From dbf52978ee6bf915b7bb2576b9c39561440b6fca Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@univ-pau.fr> Date: Thu, 25 Mar 2021 11:18:25 +0100 Subject: [PATCH] Fix ci cleanup in docker image (pip uninstall not works without pythonpath export and do not remove INSTALLDIR but INSTALLDIR/lib/site-package/*). Various fixes --- ci/scripts/build_and_test.sh | 3 +- .../codegen/kernels/directional_advection.py | 498 ++++---- .../codegen/kernels/directional_remesh.py | 7 +- .../codegen/kernels/directional_stretching.py | 1021 ++++++++--------- .../device/codegen/kernels/transpose.py | 489 ++++---- hysop/backend/device/codegen/symbolic/cast.py | 106 +- .../kernels/custom_symbolic_time_integrate.py | 68 +- hysop/backend/device/logical_device.py | 83 +- .../operator/directional/stretching_dir.py | 6 +- .../host/fortran/operator/scales_advection.py | 123 +- hysop/backend/host/host_array_backend.py | 2 +- hysop/backend/host/host_operator.py | 74 +- .../operator/directional/advection_dir.py | 184 +-- .../host/python/operator/spatial_filtering.py | 40 +- hysop/core/arrays/array.py | 245 ++-- hysop/core/arrays/array_backend.py | 354 +++--- hysop/core/checkpoints.py | 6 +- hysop/core/graph/allocator.py | 29 +- hysop/core/graph/computational_graph.py | 18 +- hysop/core/tests/test_checkpoint.sh | 17 +- hysop/fields/continuous_field.py | 4 +- hysop/fields/discrete_field.py | 264 +++-- hysop/numerics/fft/fft.py | 100 +- hysop/numerics/fft/gpyfft_fft.py | 645 ++++++----- hysop/numerics/odesolvers/runge_kutta.py | 147 +-- hysop/numerics/remesh/kernel_generator.py | 279 ++--- .../splitting/directional_splitting.py | 6 +- hysop/numerics/stencil/stencil_generator.py | 298 ++--- hysop/operator/base/integrate.py | 2 +- hysop/operator/base/redistribute_operator.py | 4 +- hysop/operator/integrate.py | 54 - hysop/operator/memory_reordering.py | 2 +- hysop/parameters/parameter.py | 10 +- hysop/simulation.py | 5 +- hysop/symbolic/frame.py | 21 +- hysop/symbolic/func.py | 20 +- hysop/symbolic/spectral.py | 114 +- hysop/tools/enum.py | 140 +-- hysop/tools/io_utils.py | 3 +- hysop/tools/misc.py | 61 +- hysop/topology/cartesian_topology.py | 8 - hysop/topology/topology.py | 2 +- 42 files changed, 2804 insertions(+), 2758 deletions(-) diff --git a/ci/scripts/build_and_test.sh b/ci/scripts/build_and_test.sh index 9fcbd1505..29ba655e1 100755 --- a/ci/scripts/build_and_test.sh +++ b/ci/scripts/build_and_test.sh @@ -29,5 +29,4 @@ time ${SCRIPT_DIR}/test.sh "${HYSOP_INSTALL_DIR}" "${HYSOP_DIR}/hysop" # clean everything because image may be commited to retain hysop cache cd -rm -rf /tmp/hysop -pip3.8 uninstall hysop +rm -rf "${HYSOP_DIR}" "${HYSOP_INSTALL_DIR}" diff --git a/hysop/backend/device/codegen/kernels/directional_advection.py b/hysop/backend/device/codegen/kernels/directional_advection.py index 218d4b008..b8b175b3f 100644 --- a/hysop/backend/device/codegen/kernels/directional_advection.py +++ b/hysop/backend/device/codegen/kernels/directional_advection.py @@ -1,4 +1,7 @@ -import contextlib, math, operator, hashlib +import contextlib +import math +import operator +import hashlib from contextlib import contextmanager import numpy as np @@ -16,20 +19,20 @@ from hysop.core.arrays.all import OpenClArray from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.device.codegen.base.kernel_codegen import KernelCodeGenerator -from hysop.backend.device.codegen.base.variables import CodegenVariable, \ - CodegenVectorClBuiltin, CodegenArray +from hysop.backend.device.codegen.base.variables import CodegenVariable, \ + CodegenVectorClBuiltin, CodegenArray from hysop.backend.device.opencl import cl, clTools -from hysop.backend.device.opencl.opencl_types import OpenClTypeGen +from hysop.backend.device.opencl.opencl_types import OpenClTypeGen from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend -from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict -from hysop.backend.device.codegen.base.statistics import WorkStatistics +from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict +from hysop.backend.device.codegen.base.statistics import WorkStatistics -from hysop.backend.device.codegen.base.variables import CodegenStruct +from hysop.backend.device.codegen.base.variables import CodegenStruct from hysop.backend.device.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct -from hysop.backend.device.codegen.structs.indices import GlobalFieldInfos +from hysop.backend.device.codegen.structs.indices import GlobalFieldInfos -from hysop.backend.device.codegen.functions.runge_kutta import RungeKuttaFunction -from hysop.backend.device.codegen.functions.advection_rhs import DirectionalAdvectionRhsFunction +from hysop.backend.device.codegen.functions.runge_kutta import RungeKuttaFunction +from hysop.backend.device.codegen.functions.advection_rhs import DirectionalAdvectionRhsFunction from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta @@ -40,43 +43,44 @@ from hysop.backend.device.kernel_autotuner_config import AutotunerFlags from hysop.fields.discrete_field import DiscreteScalarFieldView + class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): @staticmethod def codegen_name(ftype, is_cached, rk_scheme, nparticles, min_ghosts, - relative_velocity, bilevel, **kargs): + relative_velocity, bilevel, **kargs): cache = '' if is_cached: cache = 'cached' if bilevel is not None: cache += str(bilevel[-1]) cache += '_' - return 'directional_{}advection_{}_{}{}p_{}g__{}'.format(cache,rk_scheme.name(), - ftype[0],nparticles,min_ghosts,abs(hash(relative_velocity))) + return 'directional_{}advection_{}_{}{}p_{}g__{}'.format(cache, rk_scheme.name(), + ftype[0], nparticles, min_ghosts, abs(hash(relative_velocity))) def __init__(self, typegen, work_dim, ftype, - is_cached, rk_scheme, - vboundary, nparticles, - relative_velocity, - offset_by_xmin = False, - min_ghosts = 0, - use_short_circuit = None, - unroll_loops = None, - symbolic_mode = False, - debug_mode = False, - tuning_mode = False, - known_vars = None, - is_bilevel = None): - - assert work_dim>0 and work_dim<=3 - assert nparticles in [1,2,4,8,16] - assert isinstance(relative_velocity, (str,float)) - check_instance(vboundary[0],BoundaryCondition) - check_instance(vboundary[1],BoundaryCondition) + is_cached, rk_scheme, + vboundary, nparticles, + relative_velocity, + offset_by_xmin=False, + min_ghosts=0, + use_short_circuit=None, + unroll_loops=None, + symbolic_mode=False, + debug_mode=False, + tuning_mode=False, + known_vars=None, + is_bilevel=None): + + assert work_dim > 0 and work_dim <= 3 + assert nparticles in [1, 2, 4, 8, 16] + assert isinstance(relative_velocity, (str, float)) + check_instance(vboundary[0], BoundaryCondition) + check_instance(vboundary[1], BoundaryCondition) check_instance(rk_scheme, ExplicitRungeKutta) - if (is_bilevel is not None) and (nparticles>1): - msg='Bilevel support with multiple particles at a time has not been implemented yet.' + if (is_bilevel is not None) and (nparticles > 1): + msg = 'Bilevel support with multiple particles at a time has not been implemented yet.' raise NotImplementedError(msg) known_vars = first_not_None(known_vars, {}) @@ -89,14 +93,14 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): if tuning_mode: unroll_loops = False - is_periodic = (vboundary[0]==BoundaryCondition.PERIODIC \ - and vboundary[1]==BoundaryCondition.PERIODIC) - assert (is_periodic and not is_cached) or min_ghosts>0 + is_periodic = (vboundary[0] == BoundaryCondition.PERIODIC + and vboundary[1] == BoundaryCondition.PERIODIC) + assert (is_periodic and not is_cached) or min_ghosts > 0 cache_size_known = ('local_size' in known_vars or is_bilevel is not None) _global = OpenClCodeGenerator.default_keywords['global'] - _local = OpenClCodeGenerator.default_keywords['local'] + _local = OpenClCodeGenerator.default_keywords['local'] if is_cached: storage = _local @@ -121,38 +125,37 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): typegen, work_dim, itype, ftype, kernel_reqs, is_cached, cache_size_known, debug_mode, known_vars, symbolic_mode) - super(DirectionalAdvectionKernelGenerator,self).__init__( - name=name, - typegen=typegen, - work_dim=work_dim, - kernel_args=kernel_args, - known_vars=known_vars, - vec_type_hint=ftype, - symbolic_mode=symbolic_mode) + super(DirectionalAdvectionKernelGenerator, self).__init__( + name=name, + typegen=typegen, + work_dim=work_dim, + kernel_args=kernel_args, + known_vars=known_vars, + vec_type_hint=ftype, + symbolic_mode=symbolic_mode) self.update_requirements(kernel_reqs) - self.itype = itype - self.ftype = ftype - self.work_dim = work_dim - self.vboundary = vboundary - self.nparticles = nparticles - self.rk_scheme = rk_scheme - self.storage = storage + self.itype = itype + self.ftype = ftype + self.work_dim = work_dim + self.vboundary = vboundary + self.nparticles = nparticles + self.rk_scheme = rk_scheme + self.storage = storage self.cache_size_known = cache_size_known - self.is_periodic = is_periodic - self.is_cached = is_cached - self.is_bilevel = is_bilevel - self.min_ghosts = min_ghosts - self.tuning_mode = tuning_mode - self.offset_by_xmin = offset_by_xmin + self.is_periodic = is_periodic + self.is_cached = is_cached + self.is_bilevel = is_bilevel + self.min_ghosts = min_ghosts + self.tuning_mode = tuning_mode + self.offset_by_xmin = offset_by_xmin self.relative_velocity = relative_velocity self.use_short_circuit = use_short_circuit - self.unroll_loops = unroll_loops + self.unroll_loops = unroll_loops self.gencode() - @classmethod def advec_ghosts(cls, velocity_cfl): """Return the minimal numbers of ghosts required on the lasr axe of the velocity grid.""" @@ -174,14 +177,14 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): def required_workgroup_cache_size(self, local_work_size): """Return a tuple of required (static,dynamic,total) cache bytes per workgroup.""" - work_dim = self.work_dim - ftype = self.ftype - is_cached = self.is_cached - flt_bytes = self.typegen.FLT_BYTES[ftype] + work_dim = self.work_dim + ftype = self.ftype + is_cached = self.is_cached + flt_bytes = self.typegen.FLT_BYTES[ftype] local_work_size = np.asarray(local_work_size) - sc,dc = 0,0 + sc, dc = 0, 0 if is_cached: count = self.nparticles*local_work_size[0]+2*self.min_ghosts if 'local_size' in self.known_vars: @@ -194,18 +197,18 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): dc *= flt_bytes tc = sc+dc - return (sc,dc,tc) + return (sc, dc, tc) def required_workgroup_velocity_cache_size(self): - ftype = self.ftype - flt_bytes = self.typegen.FLT_BYTES[ftype] + ftype = self.ftype + flt_bytes = self.typegen.FLT_BYTES[ftype] c = self.is_bilevel[-1]+2*self.min_ghosts c *= flt_bytes return c - def build_requirements(self,typegen,work_dim,itype,ftype,is_cached,rk_scheme, - vboundary,relative_velocity,nparticles,force_symbolic,storage,is_periodic,known_vars): - tg=typegen + def build_requirements(self, typegen, work_dim, itype, ftype, is_cached, rk_scheme, + vboundary, relative_velocity, nparticles, force_symbolic, storage, is_periodic, known_vars): + tg = typegen reqs = WriteOnceDict() vsize = upper_pow2_or_3(work_dim) @@ -216,80 +219,79 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): mesh_info_struct = MeshInfoStruct(typegen=typegen, vsize=vsize) reqs['MeshInfoStruct'] = mesh_info_struct - field_names = ('V','P') + field_names = ('V', 'P') global_field_infos = GlobalFieldInfos(typegen=typegen, field_names=field_names, - workdim=work_dim, vsize=nparticles) + workdim=work_dim, vsize=nparticles) reqs['GlobalFieldInfos'] = global_field_infos self.field_names = field_names self.field_infos = global_field_infos.build_codegen_variable(name='field_infos') advection_rhs = DirectionalAdvectionRhsFunction(typegen=typegen, work_dim=work_dim, - ftype=ftype, is_cached=is_cached, - boundary=vboundary[0], nparticles=nparticles, - relative_velocity=relative_velocity, - ptr_restrict=True, - itype=itype, field_infos=self.field_infos) + ftype=ftype, is_cached=is_cached, + boundary=vboundary[0], nparticles=nparticles, + relative_velocity=relative_velocity, + ptr_restrict=True, + itype=itype, field_infos=self.field_infos) used_vars = RungeKuttaFunction._default_used_vars.copy() - used_vars['y']='X' - used_vars['step']='rk_step' + used_vars['y'] = 'X' + used_vars['step'] = 'rk_step' runge_kutta = RungeKuttaFunction(typegen=tg, ftype=ftype, - method=rk_scheme, - rhs=advection_rhs, - used_vars=used_vars, - known_args=None) + method=rk_scheme, + rhs=advection_rhs, + used_vars=used_vars, + known_args=None) reqs['runge_kutta'] = runge_kutta return reqs - def gen_kernel_arguments(self, typegen, work_dim, itype, ftype, requirements,is_cached, - cache_size_known, debug_mode, known_vars, symbolic_mode): + def gen_kernel_arguments(self, typegen, work_dim, itype, ftype, requirements, is_cached, + cache_size_known, debug_mode, known_vars, symbolic_mode): kargs = ArgDict() - kargs['dt'] = CodegenVariable(ctype=ftype,name='dt',typegen=typegen, - add_impl_const=True,nl=True) + kargs['dt'] = CodegenVariable(ctype=ftype, name='dt', typegen=typegen, + add_impl_const=True, nl=True) mesh_dim = upper_pow2_or_3(work_dim) self.velocity, self.velocity_strides = OpenClArrayBackend.build_codegen_arguments(kargs, - name='V', - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, - ptr_restrict=True, const=True) + name='V', + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, + ptr_restrict=True, const=True) self.position, self.position_strides = OpenClArrayBackend.build_codegen_arguments(kargs, - name='P', - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, - ptr_restrict=True, const=False) - + name='P', + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, + ptr_restrict=True, const=False) if debug_mode: - kargs['dbg0'] = CodegenVariable(storage=self._global,name='dbg0',ctype=itype, - typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True) - kargs['dbg1'] = CodegenVariable(storage=self._global,name='dbg1',ctype=itype, - typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True) + kargs['dbg0'] = CodegenVariable(storage=self._global, name='dbg0', ctype=itype, + typegen=typegen, ptr_restrict=True, ptr=True, const=False, add_impl_const=True) + kargs['dbg1'] = CodegenVariable(storage=self._global, name='dbg1', ctype=itype, + typegen=typegen, ptr_restrict=True, ptr=True, const=False, add_impl_const=True) kargs['V_mesh_info'] = requirements['MeshInfoStruct'].build_codegen_variable( - const=True, name='V_mesh_info') + const=True, name='V_mesh_info') kargs['P_mesh_info'] = requirements['MeshInfoStruct'].build_codegen_variable( - const=True, name='P_mesh_info') + const=True, name='P_mesh_info') if is_cached and not cache_size_known: - _local = OpenClCodeGenerator.default_keywords['local'] + _local = OpenClCodeGenerator.default_keywords['local'] kargs['Vc'] = CodegenVariable(storage=_local, ctype=ftype, add_impl_const=True, - name='Vc', ptr=True, ptr_restrict=True, typegen=typegen, nl=False) + name='Vc', ptr=True, ptr_restrict=True, typegen=typegen, nl=False) return kargs def gencode(self): - s = self + s = self tg = s.typegen - work_dim = s.work_dim - itype = s.itype - ftype = s.ftype - vboundary = s.vboundary - storage = s.storage + work_dim = s.work_dim + itype = s.itype + ftype = s.ftype + vboundary = s.vboundary + storage = s.storage nparticles = s.nparticles min_ghosts = s.min_ghosts field_infos = s.field_infos @@ -298,24 +300,24 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): symbolic_mode = s.symbolic_mode use_short_circuit = s.use_short_circuit - is_periodic = s.is_periodic - is_cached = s.is_cached + is_periodic = s.is_periodic + is_cached = s.is_cached cache_size_known = s.cache_size_known vtype = tg.vtype(ftype, work_dim) pvtype = tg.vtype(ftype, nparticles) - global_id = s.vars['global_id'] - local_id = s.vars['local_id'] - group_id = s.vars['group_id'] + global_id = s.vars['global_id'] + local_id = s.vars['local_id'] + group_id = s.vars['group_id'] - global_index = s.vars['global_index'] - local_index = s.vars['local_index'] + global_index = s.vars['global_index'] + local_index = s.vars['local_index'] - global_size = s.vars['global_size'] - local_size = s.vars['local_size'] + global_size = s.vars['global_size'] + local_size = s.vars['local_size'] - dt = s.vars['dt'] + dt = s.vars['dt'] position_mesh_info = s.vars['P_mesh_info'] velocity_mesh_info = s.vars['V_mesh_info'] @@ -329,24 +331,24 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): "In bilevel, velocity must be cached " + str(is_cached) + " " + str(cache_size_known) compute_grid_size = position_mesh_info['local_mesh']['compute_resolution'].view( - 'p_compute_grid_size', slice(0, work_dim), const=True) + 'p_compute_grid_size', slice(0, work_dim), const=True) if has_bilevel: v_grid_size = velocity_mesh_info['local_mesh']['resolution'].view( - 'v_grid_size', slice(0, work_dim), const=True) + 'v_grid_size', slice(0, work_dim), const=True) P_grid_ghosts = position_mesh_info['ghosts'].view( - 'P_grid_ghosts', slice(0,work_dim), const=True) + 'P_grid_ghosts', slice(0, work_dim), const=True) V_grid_ghosts = velocity_mesh_info['ghosts'].view( - 'V_grid_ghosts', slice(0,work_dim), const=True) + 'V_grid_ghosts', slice(0, work_dim), const=True) - dx = position_mesh_info['dx'].view('p_dx', slice(0,work_dim), const=True) - inv_dx = position_mesh_info['inv_dx'].view('p_inv_dx', slice(0,work_dim), const=True) - v_dx = velocity_mesh_info['dx'].view('v_dx', slice(0,work_dim), const=True) - v_inv_dx = velocity_mesh_info['inv_dx'].view('v_inv_dx', slice(0,work_dim), const=True) + dx = position_mesh_info['dx'].view('p_dx', slice(0, work_dim), const=True) + inv_dx = position_mesh_info['inv_dx'].view('p_inv_dx', slice(0, work_dim), const=True) + v_dx = velocity_mesh_info['dx'].view('v_dx', slice(0, work_dim), const=True) + v_inv_dx = velocity_mesh_info['inv_dx'].view('v_inv_dx', slice(0, work_dim), const=True) xmin = CodegenVariable(name='xmin', ctype=ftype, typegen=tg, const=True, - init='{} + {}*{}'.format(position_mesh_info['local_mesh']['xmin'][0], - P_grid_ghosts[0], dx[0])) + init='{} + {}*{}'.format(position_mesh_info['local_mesh']['xmin'][0], + P_grid_ghosts[0], dx[0])) position_gid = CodegenVectorClBuiltin('P_gid', itype, work_dim, typegen=tg) velocity_gid = CodegenVectorClBuiltin('V_gid', itype, work_dim, typegen=tg) @@ -357,27 +359,27 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): velocity_ix = CodegenVariable(name='V_gid_x', ctype=itype, typegen=tg, const=True) line_offset_for_v = CodegenVariable(name='line_offset_for_v', ctype=itype, typegen=tg, const=False) - runge_kutta = self.reqs['runge_kutta'] + runge_kutta = self.reqs['runge_kutta'] advec_ghosts = CodegenVariable('V_advec_ghosts', itype, typegen=tg, value=min_ghosts) - line_offset = CodegenVariable(name='line_offset', ctype=itype, typegen=tg, const=True) - line_work = CodegenVariable(name='line_work', ctype=itype, typegen=tg, const=True) + line_offset = CodegenVariable(name='line_offset', ctype=itype, typegen=tg, const=True) + line_work = CodegenVariable(name='line_work', ctype=itype, typegen=tg, const=True) line_velocity = CodegenVariable(name='Vl', ctype=ftype, ptr=True, - storage='__global', ptr_restrict=True, ptr_const=True, const=True, - typegen=tg) + storage='__global', ptr_restrict=True, ptr_const=True, const=True, + typegen=tg) line_position = CodegenVariable(name='Pl', ctype=ftype, ptr=True, - storage='__global', ptr_restrict=True, ptr_const=True, const=False, - typegen=tg) + storage='__global', ptr_restrict=True, ptr_const=True, const=False, + typegen=tg) - X = CodegenVectorClBuiltin('X', ftype, nparticles, typegen=tg) - pid = CodegenVectorClBuiltin('pid', itype, nparticles, typegen=tg, const=True) - poffset = CodegenVectorClBuiltin('poffset', itype, nparticles, typegen=tg) + X = CodegenVectorClBuiltin('X', ftype, nparticles, typegen=tg) + pid = CodegenVectorClBuiltin('pid', itype, nparticles, typegen=tg, const=True) + poffset = CodegenVectorClBuiltin('poffset', itype, nparticles, typegen=tg) npart = CodegenVariable(name='npart', ctype=itype, typegen=tg, init=nparticles) if is_cached: V_cache_width = CodegenVariable('V_cache_width', itype, typegen=tg, - const=True) + const=True) if cache_size_known: if has_bilevel: # if bilevel, velocity cache is the entire local line @@ -393,7 +395,7 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): else: L = s.known_vars['local_size'][0] Vc_shape = (nparticles*L+2*min_ghosts,) - Vc = CodegenArray(name='Vc',dim=1,ctype=ftype,typegen=tg, + Vc = CodegenArray(name='Vc', dim=1, ctype=ftype, typegen=tg, shape=Vc_shape, storage='__local') else: Vc = s.vars['Vc'] @@ -405,34 +407,34 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): is_active = CodegenVariable('is_active', 'bool', tg, const=True) kmax = CodegenVariable('kmax', itype, tg, const=True, - init='({Sx}+{npart}*{Lx}-1)/({npart}*{Lx})'.format( - Sx=compute_grid_size[0], npart=npart(),Lx=local_size[0])) + init='({Sx}+{npart}*{Lx}-1)/({npart}*{Lx})'.format( + Sx=compute_grid_size[0], npart=npart(), Lx=local_size[0])) @contextmanager def _work_iterate_(i): try: - if i==0: - fval = 0 - gsize = 1 - N = kmax + if i == 0: + fval = 0 + gsize = 1 + N = kmax else: fval = global_id[i] gsize = global_size[i] - N = '{Sx}'.format(Sx=compute_grid_size[i]) + N = '{Sx}'.format(Sx=compute_grid_size[i]) position_ghosts = P_grid_ghosts[i] velocity_ghosts = V_grid_ghosts[i] with s._for_('int {i}={fval}; {i}<{N}; {i}+={gsize}'.format( - i='kji'[i], fval=fval, gsize=gsize,N=N), - unroll=(i==0) and self.unroll_loops) as ctx: - if i==0: + i='kji'[i], fval=fval, gsize=gsize, N=N), + unroll=(i == 0) and self.unroll_loops) as ctx: + if i == 0: with s._align_() as al: is_last.declare(al, align=True, - init='({} == ({}-1))'.format('kji'[0], kmax)) + init='({} == ({}-1))'.format('kji'[0], kmax)) is_active.declare(al, align=True, - init='({k}*{ls}+{lid} <= ({gs}+{npart}-1)/{npart})'.format( - npart=npart, lid=local_id[0], k='kji'[0], - gs=compute_grid_size[0], ls=local_size[0])) + init='({k}*{ls}+{lid} <= ({gs}+{npart}-1)/{npart})'.format( + npart=npart, lid=local_id[0], k='kji'[0], + gs=compute_grid_size[0], ls=local_size[0])) s.jumpline() with s._align_() as al: @@ -452,10 +454,10 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): v1 = '{}.pos.X'.format(v) mi = s.vars['{}_mesh_info'.format(k)] s.append('{} = {} + {} + {};'.format(v0, mi['start'][0], line_offset, - poffset)) + poffset)) s.append('{} = {} + convert_{}({})*{};'.format(v1, - mi['global_mesh']['xmin'][0], - pvtype, v0, mi['dx'][0])) + mi['global_mesh']['xmin'][0], + pvtype, v0, mi['dx'][0])) s.jumpline() position_gid.affect(i=0, codegen=s, init='{} + {}'.format( @@ -465,9 +467,9 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): line_offset, velocity_ghosts, advec_ghosts)) line_velocity_offset = ' + '.join('{}*{}'.format(velocity_gid[j], velocity_strides[j]) - for j in range(work_dim-1, -1, -1)) + for j in range(work_dim-1, -1, -1)) line_position_offset = ' + '.join('{}*{}'.format(position_gid[j], position_strides[j]) - for j in range(work_dim-1, -1, -1)) + for j in range(work_dim-1, -1, -1)) with s._align_() as al: line_position.declare(al, init='{} + {}'.format(position, line_position_offset), align=True) @@ -482,7 +484,7 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): mi = s.vars['{}_mesh_info'.format(k)] s.append('{} = {} + {};'.format(v0, mi['start'][i], 'kji'[i])) s.append('{} = {} + {}*{};'.format(v1, mi['global_mesh']['xmin'][i], - v0, mi['dx'][i])) + v0, mi['dx'][i])) s.jumpline() s.append('{} = {} + {};'.format(position_gid[i], 'kji'[i], position_ghosts)) @@ -492,15 +494,15 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): s.append('{} = convert_int_rtn({});'.format(velocity_gid[i], velocity_gid_pos[i])) s.append('{} = {} - convert_{}({});'.format( velocity_h[i], velocity_gid_pos[i], part_ftype, velocity_gid[i])) - if i==1: - if work_dim==3: + if i == 1: + if work_dim == 3: line_velocity_offset = '({}+{})*{}+({}+{})*{}+{}-{}'.format( - velocity_gid[2],V_grid_ghosts[2], velocity_strides[2], - velocity_gid[1],V_grid_ghosts[1], velocity_strides[1], + velocity_gid[2], V_grid_ghosts[2], velocity_strides[2], + velocity_gid[1], V_grid_ghosts[1], velocity_strides[1], V_grid_ghosts[0], advec_ghosts) - elif work_dim==2: + elif work_dim == 2: line_velocity_offset = '({}+{})*{}+{}-{}'.format( - velocity_gid[1],V_grid_ghosts[1], velocity_strides[1], + velocity_gid[1], V_grid_ghosts[1], velocity_strides[1], V_grid_ghosts[0], advec_ghosts) s.jumpline() with s._align_() as al: @@ -510,30 +512,30 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): if velocity_cache_full_length: s.comment("Load velocity in cache with linear interpolation") with s._for_('int {i}={fval}; {i}<{N}; {i}+={gsize}'.format( - i=velocity_ix, fval=local_id[0], gsize=local_size[0],N=Vc_shape[0]), - unroll=False): + i=velocity_ix, fval=local_id[0], gsize=local_size[0], N=Vc_shape[0]), + unroll=False): is_first = True - if work_dim==3: - for ii,jj in [(ii,jj) for ii in range(2) for jj in range(2)]: + if work_dim == 3: + for ii, jj in [(ii, jj) for ii in range(2) for jj in range(2)]: s.append('{}[{}] {} ({}{})*({}{})*{};'.format( Vc, velocity_ix, " =" if is_first else "+=", - "1.0-" if ii==0 else " ", velocity_h[2], - "1.0-" if jj==0 else " ", velocity_h[1], + "1.0-" if ii == 0 else " ", velocity_h[2], + "1.0-" if jj == 0 else " ", velocity_h[1], s.vload(n=nparticles, ptr=line_velocity, - offset='({})*{}+({})*{}+{}*{}'.format( - ii, velocity_strides[2], - jj, velocity_strides[1], - velocity_ix, velocity_strides[0])))) + offset='({})*{}+({})*{}+{}*{}'.format( + ii, velocity_strides[2], + jj, velocity_strides[1], + velocity_ix, velocity_strides[0])))) is_first = False - elif work_dim==2: + elif work_dim == 2: for ii in range(2): s.append('{}[{}] {} ({}{})*{};'.format( Vc, velocity_ix, " =" if is_first else "+=", - "1.0-" if ii==0 else " ", velocity_h[1], - s.vload(n=nparticles,ptr=line_velocity, - offset='({})*{}+{}*{}'.format( - ii, velocity_strides[1], - velocity_ix, velocity_strides[0])))) + "1.0-" if ii == 0 else " ", velocity_h[1], + s.vload(n=nparticles, ptr=line_velocity, + offset='({})*{}+{}*{}'.format( + ii, velocity_strides[1], + velocity_ix, velocity_strides[0])))) is_first = False else: raise RuntimeError("Bilevel advection 1D need further developments") @@ -550,26 +552,26 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): field_infos.declare(s) s.jumpline() with s._align_() as al: - global_id.declare(al,align=True,const=True) - local_id.declare(al,align=True,const=True) - global_size.declare(al,align=True,const=True) - local_size.declare(al,align=True,const=True) + global_id.declare(al, align=True, const=True) + local_id.declare(al, align=True, const=True) + global_size.declare(al, align=True, const=True) + local_size.declare(al, align=True, const=True) s.jumpline() with s._align_() as al: - compute_grid_size.declare(al,align=True) + compute_grid_size.declare(al, align=True) if has_bilevel: - v_grid_size.declare(al,align=True) - P_grid_ghosts.declare(al,align=True) - V_grid_ghosts.declare(al,align=True) + v_grid_size.declare(al, align=True) + P_grid_ghosts.declare(al, align=True) + V_grid_ghosts.declare(al, align=True) al.jumpline() dx.declare(al, align=True) - inv_dx.declare(al,align=True) + inv_dx.declare(al, align=True) if has_bilevel: v_dx.declare(al, align=True) - v_inv_dx.declare(al,align=True) + v_inv_dx.declare(al, align=True) s.jumpline() - xmin.declare(al,align=True) + xmin.declare(al, align=True) s.jumpline() with s._align_() as al: @@ -588,7 +590,7 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): s.jumpline() if is_cached and cache_size_known: - Vc.declare(s); + Vc.declare(s) s.jumpline() kmax.declare(s) @@ -602,15 +604,15 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): if is_cached and not has_bilevel: if tuning_mode: loop = 'int {i}={Lx}; {i}<{N}; {i}+={gsize}'.format( - i='idx', N=V_cache_width, - Lx=local_id[0], gsize=local_size[0]) + i='idx', N=V_cache_width, + Lx=local_id[0], gsize=local_size[0]) with s._for_(loop): - code='{dst}[{i}] = 0.5;'.format(i='idx', dst=Vc) + code = '{dst}[{i}] = 0.5;'.format(i='idx', dst=Vc) s.append(code) s.barrier(_local=True) else: - code='event_t event = async_work_group_copy({dst}, {src}, {ne}, {event});'.format( - dst=Vc, src=line_velocity, ne=V_cache_width, event=0) + code = 'event_t event = async_work_group_copy({dst}, {src}, {ne}, {event});'.format( + dst=Vc, src=line_velocity, ne=V_cache_width, event=0) s.append(code) code = 'wait_group_events(1, &event);' s.append(code) @@ -621,33 +623,33 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): mesh_ratio = 1 s_mesh_size = position_mesh_info['global_mesh']['compute_resolution'].value[0] v_mesh_size = velocity_mesh_info['global_mesh']['compute_resolution'].value[0] - if (s_mesh_size%v_mesh_size==0): + if (s_mesh_size % v_mesh_size == 0): mesh_ratio = s_mesh_size // v_mesh_size with s._if_('k%{}==0'.format(mesh_ratio)): s.append('{} = convert_int_rtn(convert_{}({}+1)*{}*{});'.format(line_offset_for_v, ftype, line_offset, dx[0], v_inv_dx[0])) with s._for_('int {i}={fval}; {i}<{N} && ({i}+{o})<{Nv}; {i}+={gsize}'.format( - i=velocity_ix, fval=local_id[0], gsize=local_size[0], - N=Vc_shape[0],o=line_offset_for_v, Nv=v_grid_size[0]), - unroll=False): - if work_dim==3: + i=velocity_ix, fval=local_id[0], gsize=local_size[0], + N=Vc_shape[0], o=line_offset_for_v, Nv=v_grid_size[0]), + unroll=False): + if work_dim == 3: is_first = True - for ii,jj in [(ii,jj) for ii in range(2) for jj in range(2)]: + for ii, jj in [(ii, jj) for ii in range(2) for jj in range(2)]: s.append('{}[{}] {} ({}{})*({}{})*{};'.format( Vc, velocity_ix, " =" if is_first else "+=", - "1.0-" if ii==0 else " ",velocity_h[2], - "1.0-" if jj==0 else " ",velocity_h[1], - s.vload(n=nparticles,ptr=line_velocity,offset='({})*{}+({})*{}+({}+{})*{}'.format( + "1.0-" if ii == 0 else " ", velocity_h[2], + "1.0-" if jj == 0 else " ", velocity_h[1], + s.vload(n=nparticles, ptr=line_velocity, offset='({})*{}+({})*{}+({}+{})*{}'.format( ii, velocity_strides[2], jj, velocity_strides[1], line_offset_for_v, velocity_ix, velocity_strides[0])))) is_first = False - elif work_dim==2: + elif work_dim == 2: for ii in range(2): s.append('{}[{}] {} ({}{})*{};'.format( Vc, velocity_ix, " =" if is_first else "+=", - "1.0-" if jj==0 else " ",velocity_h[1], - s.vload(n=nparticles,ptr=line_velocity,offset='({}+{})*{}+({}+{})*{}'.format( + "1.0-" if jj == 0 else " ", velocity_h[1], + s.vload(n=nparticles, ptr=line_velocity, offset='({})*{}+({}+{})*{}'.format( jj, velocity_strides[1], line_offset_for_v, velocity_ix, velocity_strides[0])))) @@ -657,9 +659,9 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): with s._if_(is_active()): pid.declare(s, init='{} + {}*{} + {}'.format(line_offset, local_id[0], - npart, poffset)) + npart, poffset)) X.declare(s, init='convert_{}(min({},{}-1))*{}'.format(part_ftype, pid, - compute_grid_size[0], dx[0])); + compute_grid_size[0], dx[0])) s.jumpline() # Modify for bilevel interpolation : in this case, the line @@ -679,11 +681,11 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): 'inv_dx': inv_dx[0], 'line_offset': line_offset, 'field_infos': '&{}'.format(field_infos), - } + } if is_cached: - rk_args['line_velocity'] = Vc + rk_args['line_velocity'] = Vc else: - rk_args['line_velocity'] = line_velocity + rk_args['line_velocity'] = line_velocity rk_args['line_velocity'] = '{}+{}'.format(rk_args['line_velocity'], min_ghosts) if is_periodic: rk_args['line_width'] = compute_grid_size[0] @@ -696,44 +698,44 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): s.jumpline() s.vstore_if(cond=is_last, - scalar_cond=lambda i: '{} < {}'.format(pid[i], compute_grid_size[0]), - n=nparticles, ptr=line_position, - offset=local_id[0], data=X, offset_is_ftype=False, - use_short_circuit=use_short_circuit) + scalar_cond=lambda i: '{} < {}'.format(pid[i], compute_grid_size[0]), + n=nparticles, ptr=line_position, + offset=local_id[0], data=X, offset_is_ftype=False, + use_short_circuit=use_short_circuit) if __name__ == '__main__': from hysop.backend.device.opencl import cl from hysop.backend.device.codegen.base.test import _test_mesh_info, _test_typegen - work_dim=3 - ghosts=(0,0,0) - vresolution=(128,64,32) - presolution=(128,64,32) - local_size = (1024,1,1) - global_size = (16050,55,440) + work_dim = 3 + ghosts = (0, 0, 0) + vresolution = (128, 64, 32) + presolution = (128, 64, 32) + local_size = (1024, 1, 1) + global_size = (16050, 55, 440) tg = _test_typegen('float') - (_,vmesh_info) = _test_mesh_info('velocity_mesh_info',tg,work_dim,ghosts,vresolution) - (_,pmesh_info) = _test_mesh_info('position_mesh_info',tg,work_dim,ghosts,presolution) + (_, vmesh_info) = _test_mesh_info('velocity_mesh_info', tg, work_dim, ghosts, vresolution) + (_, pmesh_info) = _test_mesh_info('position_mesh_info', tg, work_dim, ghosts, presolution) dak = DirectionalAdvectionKernelGenerator(typegen=tg, ftype=tg.fbtype, - work_dim=work_dim, - rk_scheme=ExplicitRungeKutta('Euler'), - vboundary=(BoundaryCondition.PERIODIC,BoundaryCondition.PERIODIC), - is_cached=True, - min_ghosts=10, - symbolic_mode=True, - relative_velocity=0.66, - nparticles=4, # MONOLEVEL TEST - tuning_mode = True, - #nparticles=1, is_bilevel=(256,64,32), # BILEVEL TEST - known_vars=dict( - V_mesh_info=vmesh_info, - P_mesh_info=pmesh_info, - local_size=local_size[:work_dim], - global_size=global_size[:work_dim] - ) - ) + work_dim=work_dim, + rk_scheme=ExplicitRungeKutta('Euler'), + vboundary=(BoundaryCondition.PERIODIC, BoundaryCondition.PERIODIC), + is_cached=True, + min_ghosts=10, + symbolic_mode=True, + relative_velocity=0.66, + nparticles=4, # MONOLEVEL TEST + tuning_mode=True, + # nparticles=1, is_bilevel=(256,64,32), # BILEVEL TEST + known_vars=dict( + V_mesh_info=vmesh_info, + P_mesh_info=pmesh_info, + local_size=local_size[:work_dim], + global_size=global_size[:work_dim] + ) + ) dak.edit() dak.test_compile() diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py index 0027087b8..8de4fa5b4 100644 --- a/hysop/backend/device/codegen/kernels/directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/directional_remesh.py @@ -1,4 +1,7 @@ -import contextlib, math, operator, hashlib +import contextlib +import math +import operator +import hashlib from contextlib import contextmanager import numpy as np @@ -910,7 +913,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): with if_thread_active(not_first=False): with s._align_() as al: if debug_mode: - s.append('printf("%i wrote {vnf} from cache position %i to global id %i.\\n", {},{},{},{});'.format( + s.append('printf("%i wrote {vnf} from cache position %i to global id %i.\\n", {},{},{},{},{},{},{});'.format( local_id[0], epv(self.vload(nparticles, cached_scalars[0][0], local_offset)), local_offset, particle_offset, compute_grid_size[0], cache_ghosts, compute_grid_size[0], diff --git a/hysop/backend/device/codegen/kernels/directional_stretching.py b/hysop/backend/device/codegen/kernels/directional_stretching.py index 6c64a8817..2d35cfb79 100644 --- a/hysop/backend/device/codegen/kernels/directional_stretching.py +++ b/hysop/backend/device/codegen/kernels/directional_stretching.py @@ -1,4 +1,6 @@ -import operator, hashlib, contextlib +import operator +import hashlib +import contextlib import numpy as np from hysop import __VERBOSE__, __KERNEL_DEBUG__ @@ -7,7 +9,7 @@ from hysop.tools.misc import Utils, upper_pow2_or_3 from hysop.tools.types import check_instance from hysop.tools.contexts import nested -from hysop.constants import DirectionLabels, BoundaryCondition, Backend, SpaceDiscretization +from hysop.constants import DirectionLabels, BoundaryCondition, Backend, SpaceDiscretization, AutotunerFlags from hysop.methods import StretchingFormulation from hysop.fields.discrete_field import DiscreteScalarFieldView @@ -15,64 +17,63 @@ from hysop.backend.device.opencl import clTools from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.device.codegen.base.kernel_codegen import KernelCodeGenerator -from hysop.backend.device.codegen.base.variables import CodegenVariable, \ - CodegenVectorClBuiltin, CodegenArray -from hysop.backend.device.opencl.opencl_types import OpenClTypeGen -from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict -from hysop.backend.device.codegen.base.statistics import WorkStatistics - -from hysop.backend.device.codegen.base.variables import CodegenStruct -from hysop.backend.device.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct - -from hysop.backend.device.codegen.functions.compute_index import ComputeIndexFunction -from hysop.backend.device.codegen.functions.cache_load import CacheLoadFunction -from hysop.backend.device.codegen.functions.runge_kutta import RungeKuttaFunction +from hysop.backend.device.codegen.base.variables import CodegenVariable, \ + CodegenVectorClBuiltin, CodegenArray +from hysop.backend.device.opencl.opencl_types import OpenClTypeGen +from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict +from hysop.backend.device.codegen.base.statistics import WorkStatistics + +from hysop.backend.device.codegen.base.variables import CodegenStruct +from hysop.backend.device.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct + +from hysop.backend.device.codegen.functions.compute_index import ComputeIndexFunction +from hysop.backend.device.codegen.functions.cache_load import CacheLoadFunction +from hysop.backend.device.codegen.functions.runge_kutta import RungeKuttaFunction from hysop.backend.device.codegen.functions.stretching_rhs \ - import DirectionalStretchingRhsFunction + import DirectionalStretchingRhsFunction from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta from hysop.backend.device.opencl import cl, clCharacterize from hysop.backend.device.opencl.opencl_env import OpenClEnvironment -#from hysop.backend.device.opencl.opencl_kernel import OpenClKernelLauncher -#from hysop.backend.device.kernel_autotuner import KernelAutotuner, AutotunerFlags, \ - #KernelGenerationError +from hysop.backend.device.kernel_autotuner import KernelAutotuner, KernelGenerationError + class DirectionalStretchingKernel(KernelCodeGenerator): @staticmethod - def codegen_name(ftype,is_cached,is_inplace,direction,formulation): - inplace ='inplace_' if is_inplace else '' + def codegen_name(ftype, is_cached, is_inplace, direction, formulation): + inplace = 'inplace_' if is_inplace else '' cache = 'cached_' if is_cached else '' sformulation = str(formulation).lower() - return 'directional_{}{}stretching_{}_{}{}'.format(cache,inplace,sformulation, - ftype[0],DirectionLabels[direction]) + return 'directional_{}{}stretching_{}_{}{}'.format(cache, inplace, sformulation, + ftype[0], DirectionLabels[direction]) def __init__(self, typegen, dim, ftype, order, direction, - is_cached, is_inplace, - boundary, formulation, time_integrator, - symbolic_mode = False, - known_vars = None): + is_cached, is_inplace, + boundary, formulation, time_integrator, + symbolic_mode=False, + known_vars=None): check_instance(formulation, StretchingFormulation) check_instance(boundary[0], BoundaryCondition) check_instance(boundary[1], BoundaryCondition) - check_instance(time_integrator,ExplicitRungeKutta) + check_instance(time_integrator, ExplicitRungeKutta) check_instance(is_inplace, bool) - assert dim==3 - assert direction<dim - assert order>1 and order%2==0 - assert boundary[0] in [BoundaryCondition.NONE,BoundaryCondition.PERIODIC] - assert boundary[1] in [BoundaryCondition.NONE,BoundaryCondition.PERIODIC] + assert dim == 3 + assert direction < dim + assert order > 1 and order % 2 == 0 + assert boundary[0] in [BoundaryCondition.NONE, BoundaryCondition.PERIODIC] + assert boundary[1] in [BoundaryCondition.NONE, BoundaryCondition.PERIODIC] if known_vars is None: known_vars = {} local_size_known = ('local_size' in known_vars) - is_conservative = (formulation==StretchingFormulation.CONSERVATIVE) - is_periodic = (boundary[0]==BoundaryCondition.PERIODIC) \ - and (boundary[1]==BoundaryCondition.PERIODIC) + is_conservative = (formulation == StretchingFormulation.CONSERVATIVE) + is_periodic = (boundary[0] == BoundaryCondition.PERIODIC) \ + and (boundary[1] == BoundaryCondition.PERIODIC) if is_cached: storage = OpenClCodeGenerator.default_keywords['local'] @@ -82,45 +83,45 @@ class DirectionalStretchingKernel(KernelCodeGenerator): if is_inplace and is_conservative and not is_cached: raise ValueError('Inplace conservetive stretching requires caching.') - name = DirectionalStretchingKernel.codegen_name(ftype,is_cached,is_inplace, - direction,formulation) + name = DirectionalStretchingKernel.codegen_name(ftype, is_cached, is_inplace, + direction, formulation) kernel_reqs = self.build_requirements(typegen, dim, ftype, order, is_cached, - time_integrator, direction, boundary, symbolic_mode, formulation, storage, - is_periodic, is_inplace) + time_integrator, direction, boundary, symbolic_mode, formulation, storage, + is_periodic, is_inplace) kernel_args = self.gen_kernel_arguments(typegen, dim, ftype, kernel_reqs, is_cached, - is_inplace, local_size_known) + is_inplace, local_size_known) - super(DirectionalStretchingKernel,self).__init__( - name=name, - typegen=typegen, - work_dim=dim, - kernel_args=kernel_args, - known_vars=known_vars, - vec_type_hint=ftype, - symbolic_mode=symbolic_mode) + super(DirectionalStretchingKernel, self).__init__( + name=name, + typegen=typegen, + work_dim=dim, + kernel_args=kernel_args, + known_vars=known_vars, + vec_type_hint=ftype, + symbolic_mode=symbolic_mode) self.update_requirements(kernel_reqs) - self.order = order - self.ftype = ftype - self.direction = direction - self.dim = dim - self.boundary = boundary - self.time_integrator = time_integrator - self.formulation = formulation - self.storage = storage + self.order = order + self.ftype = ftype + self.direction = direction + self.dim = dim + self.boundary = boundary + self.time_integrator = time_integrator + self.formulation = formulation + self.storage = storage self.local_size_known = local_size_known - self.is_conservative = is_conservative - self.is_periodic = is_periodic - self.is_cached = is_cached - self.is_inplace = is_inplace + self.is_conservative = is_conservative + self.is_periodic = is_periodic + self.is_cached = is_cached + self.is_inplace = is_inplace self.gencode() - #return minimal number of ghosts required on the grid + # return minimal number of ghosts required on the grid # for input velocity and vorticity. @staticmethod def min_ghosts(boundary, formulation, order, time_integrator, direction): @@ -130,7 +131,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): (rboundary == BoundaryCondition.PERIODIC): pass elif lboundary in [BoundaryCondition.NONE, BoundaryCondition.PERIODIC]: - assert order%2==0 + assert order % 2 == 0 stencil_ghost = order//2 if formulation == StretchingFormulation.CONSERVATIVE: u_ghosts[direction] = time_integrator.stages * stencil_ghost @@ -154,30 +155,30 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ghosts[0] = time_integrator.stages * stencil_ghost else: ghosts[0] = stencil_ghost - ghosts[0]*=4 + ghosts[0] *= 4 return np.asarray(ghosts) - #return global_work_size from effective work_size without + # return global_work_size from effective work_size without # taking into account local_work_size alignment @staticmethod def get_max_global_size(work_size, work_load, **kargs): - work_size = np.asarray(work_size) - work_load = np.asarray(work_load) + work_size = np.asarray(work_size) + work_load = np.asarray(work_load) assert work_load[0] == 1 global_size = work_size.copy() global_size = ((global_size+work_load-1)//work_load) return global_size - #return global_work_size from effective work_size and given local_work_size + # return global_work_size from effective work_size and given local_work_size # global_work_size will be a multiple of local_work_size - def get_global_size(self, work_size, local_work_size, work_load=[1,1,1]): - work_size = np.asarray(work_size) - work_load = np.asarray(work_load) + def get_global_size(self, work_size, local_work_size, work_load=[1, 1, 1]): + work_size = np.asarray(work_size) + work_load = np.asarray(work_load) local_work_size = np.asarray(local_work_size) v_min_ghosts, w_min_ghosts = self._min_ghosts() - assert (local_work_size[1] == 1) and (local_work_size[2]==1) + assert (local_work_size[1] == 1) and (local_work_size[2] == 1) assert (local_work_size > 2*w_min_ghosts).all() if 'local_size' in self.known_vars: @@ -185,11 +186,11 @@ class DirectionalStretchingKernel(KernelCodeGenerator): if 'velocity_mesh_info' in self.known_vars and (not self.is_periodic): velocity_mesh_info = self.known_vars['velocity_mesh_info'].value vghosts = velocity_mesh_info['ghosts'][:3] - assert (vghosts>=v_min_ghosts).all() + assert (vghosts >= v_min_ghosts).all() if 'vorticity_mesh_info' in self.known_vars and (not self.is_periodic): vorticity_mesh_info = self.known_vars['vorticity_mesh_info'].value wghosts = vorticity_mesh_info['ghosts'][:3] - assert (wghosts>=w_min_ghosts).all() + assert (wghosts >= w_min_ghosts).all() max_global_size = self.get_max_global_size(work_size, work_load) max_global_size[0] = local_work_size[0] @@ -197,20 +198,20 @@ class DirectionalStretchingKernel(KernelCodeGenerator): return global_size - #return a tuple of required (static,dynamic,total) cache bytes per workgroup + # return a tuple of required (static,dynamic,total) cache bytes per workgroup def required_workgroup_cache_size(self, local_work_size): - dim = self.work_dim - ftype = self.ftype - is_cached = self.is_cached - direction = self.direction - cache_ghosts = self._cache_ghosts() - is_periodic = self.is_periodic + dim = self.work_dim + ftype = self.ftype + is_cached = self.is_cached + direction = self.direction + cache_ghosts = self._cache_ghosts() + is_periodic = self.is_periodic is_conservative = self.is_conservative - flt_bytes = self.typegen.FLT_BYTES[ftype] + flt_bytes = self.typegen.FLT_BYTES[ftype] local_work_size = np.asarray(local_work_size) - sc,dc = 0,0 + sc, dc = 0, 0 if is_cached: count = dim*local_work_size[0] if is_conservative: @@ -230,7 +231,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): dc *= flt_bytes tc = sc+dc - return (sc,dc,tc) + return (sc, dc, tc) def _cache_ghosts(self): stencil_ghost = self.order//2 @@ -238,18 +239,18 @@ class DirectionalStretchingKernel(KernelCodeGenerator): return self.time_integrator.stages * stencil_ghost else: return stencil_ghost + def _min_ghosts(self): return self.min_ghosts(self.boundary, self.formulation, - self.order, self.time_integrator, self.direction) - + self.order, self.time_integrator, self.direction) - def build_requirements(self,typegen,work_dim,ftype,order,is_cached,time_integrator,direction, - boundary,force_symbolic,formulation,storage,is_periodic,is_inplace): - tg=typegen + def build_requirements(self, typegen, work_dim, ftype, order, is_cached, time_integrator, direction, + boundary, force_symbolic, formulation, storage, is_periodic, is_inplace): + tg = typegen reqs = WriteOnceDict() compute_id = ComputeIndexFunction(typegen=typegen, dim=work_dim, itype='int', - wrap=is_periodic) + wrap=is_periodic) reqs['compute_id'] = compute_id vsize = upper_pow2_or_3(work_dim) @@ -260,66 +261,66 @@ class DirectionalStretchingKernel(KernelCodeGenerator): reqs['MeshInfoStruct'] = mesh_info_struct stretching_rhs = DirectionalStretchingRhsFunction(typegen=typegen, dim=work_dim, - ftype=ftype, cached=is_cached, - order=order, direction=direction, boundary=boundary, - formulation=formulation, - ptr_restrict=True, vectorize_u=False, - itype='int') + ftype=ftype, cached=is_cached, + order=order, direction=direction, boundary=boundary, + formulation=formulation, + ptr_restrict=True, vectorize_u=False, + itype='int') used_vars = RungeKuttaFunction._default_used_vars.copy() - used_vars['y']='W' - used_vars['step']='rk_step' + used_vars['y'] = 'W' + used_vars['step'] = 'rk_step' runge_kutta = RungeKuttaFunction(typegen=tg, ftype=ftype, - method=time_integrator, - rhs=stretching_rhs, - used_vars=used_vars, - known_args=None) + method=time_integrator, + rhs=stretching_rhs, + used_vars=used_vars, + known_args=None) reqs['runge_kutta'] = runge_kutta return reqs - def gen_kernel_arguments(self, typegen, work_dim, ftype, requirements,is_cached,is_inplace, - local_size_known): + def gen_kernel_arguments(self, typegen, work_dim, ftype, requirements, is_cached, is_inplace, + local_size_known): xyz = 'xyz' - svelocity = 'U' + svelocity = 'U' svorticity = 'W' kargs = ArgDict() - kargs['dt'] = CodegenVariable(ctype=ftype,name='dt',typegen=typegen, - add_impl_const=True,nl=True) + kargs['dt'] = CodegenVariable(ctype=ftype, name='dt', typegen=typegen, + add_impl_const=True, nl=True) _global = OpenClCodeGenerator.default_keywords['global'] - _local = OpenClCodeGenerator.default_keywords['local'] + _local = OpenClCodeGenerator.default_keywords['local'] for i in range(work_dim): name = svelocity+xyz[i] - kargs[name] = CodegenVariable(storage=_global,name=name,typegen=typegen, - ctype=ftype,ptr=True,ptr_restrict=True,const=True,add_impl_const=True) + kargs[name] = CodegenVariable(storage=_global, name=name, typegen=typegen, + ctype=ftype, ptr=True, ptr_restrict=True, const=True, add_impl_const=True) for i in range(work_dim): name = svorticity+xyz[i]+'_in' - kargs[name] = CodegenVariable(storage=_global,name=name,typegen=typegen, - ctype=ftype,ptr=True,ptr_restrict=(not is_inplace),const=True,add_impl_const=True) + kargs[name] = CodegenVariable(storage=_global, name=name, typegen=typegen, + ctype=ftype, ptr=True, ptr_restrict=(not is_inplace), const=True, add_impl_const=True) for i in range(work_dim): name = svorticity+xyz[i]+'_out' - kargs[name] = CodegenVariable(storage=_global,name=name,typegen=typegen, - ctype=ftype,ptr=True,ptr_restrict=(not is_inplace),const=False,add_impl_const=True) + kargs[name] = CodegenVariable(storage=_global, name=name, typegen=typegen, + ctype=ftype, ptr=True, ptr_restrict=(not is_inplace), const=False, add_impl_const=True) kargs['velocity_mesh_info'] = \ - requirements['MeshInfoStruct'].build_codegen_variable(const=True, - name='velocity_mesh_info') + requirements['MeshInfoStruct'].build_codegen_variable(const=True, + name='velocity_mesh_info') kargs['vorticity_mesh_info'] = \ - requirements['MeshInfoStruct'].build_codegen_variable(const=True, - name='vorticity_mesh_info') + requirements['MeshInfoStruct'].build_codegen_variable(const=True, + name='vorticity_mesh_info') if is_cached and not local_size_known: - kargs['buffer'] = CodegenVariable(storage=_local,ctype=ftype, - add_impl_const=True, name='buffer', ptr=True, ptr_restrict=True, - typegen=typegen, nl=False) + kargs['buffer'] = CodegenVariable(storage=_local, ctype=ftype, + add_impl_const=True, name='buffer', ptr=True, ptr_restrict=True, + typegen=typegen, nl=False) self.svorticity = svorticity - self.svelocity = svelocity + self.svelocity = svelocity self.xyz = xyz return kargs @@ -329,137 +330,136 @@ class DirectionalStretchingKernel(KernelCodeGenerator): tg = s.typegen direction = s.direction - work_dim = s.work_dim - dim = s.dim - ftype = s.ftype - boundary = s.boundary + work_dim = s.work_dim + dim = s.dim + ftype = s.ftype + boundary = s.boundary is_cached = s.is_cached - storage = s.storage + storage = s.storage symbolic_mode = s.symbolic_mode - formulation = s.formulation - is_conservative = s.is_conservative - is_periodic = s.is_periodic + formulation = s.formulation + is_conservative = s.is_conservative + is_periodic = s.is_periodic local_size_known = s.local_size_known xyz = s.xyz - vtype = tg.vtype(ftype,work_dim) + vtype = tg.vtype(ftype, work_dim) - global_id = s.vars['global_id'] - local_id = s.vars['local_id'] - group_id = s.vars['group_id'] + global_id = s.vars['global_id'] + local_id = s.vars['local_id'] + group_id = s.vars['group_id'] - global_index = s.vars['global_index'] - local_index = s.vars['local_index'] + global_index = s.vars['global_index'] + local_index = s.vars['local_index'] - global_size = s.vars['global_size'] - local_size = s.vars['local_size'] + global_size = s.vars['global_size'] + local_size = s.vars['local_size'] - dt = s.vars['dt'] - velocity_mesh_info = s.vars['velocity_mesh_info'] + dt = s.vars['dt'] + velocity_mesh_info = s.vars['velocity_mesh_info'] vorticity_mesh_info = s.vars['vorticity_mesh_info'] - grid_size = vorticity_mesh_info['local_mesh']['resolution'].view( - 'grid_size', slice(None,dim)) - compute_grid_size = vorticity_mesh_info['local_mesh']['compute_resolution'].view( - 'compute_grid_size',slice(None,dim)) + grid_size = vorticity_mesh_info['local_mesh']['resolution'].view( + 'grid_size', slice(None, dim)) + compute_grid_size = vorticity_mesh_info['local_mesh']['compute_resolution'].view( + 'compute_grid_size', slice(None, dim)) compute_grid_ghosts = vorticity_mesh_info['ghosts'].view( - 'compute_grid_ghosts', slice(0,dim), const=True) - inv_dx = vorticity_mesh_info['inv_dx'].view( - 'inv_dx', slice(0,1), const=True) + 'compute_grid_ghosts', slice(0, dim), const=True) + inv_dx = vorticity_mesh_info['inv_dx'].view( + 'inv_dx', slice(0, 1), const=True) s.update_vars(grid_size=grid_size, inv_dx=inv_dx, - compute_grid_ghosts=compute_grid_ghosts, compute_grid_size=compute_grid_size) + compute_grid_ghosts=compute_grid_ghosts, compute_grid_size=compute_grid_size) compute_index = self.reqs['compute_id'] - runge_kutta = self.reqs['runge_kutta'] + runge_kutta = self.reqs['runge_kutta'] - W = CodegenVectorClBuiltin('W',ftype,dim,tg) - U = CodegenVectorClBuiltin('U',ftype,dim,tg) + W = CodegenVectorClBuiltin('W', ftype, dim, tg) + U = CodegenVectorClBuiltin('U', ftype, dim, tg) - first = CodegenVariable('first','bool',tg,init='true') - active = CodegenVariable('active','bool',tg, const=True) + first = CodegenVariable('first', 'bool', tg, init='true') + active = CodegenVariable('active', 'bool', tg, const=True) - cache_ghosts = CodegenVariable('cache_ghosts','int',tg, - const=True,value=self._cache_ghosts()) - local_work = CodegenVariable('lwork','int',tg,const=True) + cache_ghosts = CodegenVariable('cache_ghosts', 'int', tg, + const=True, value=self._cache_ghosts()) + local_work = CodegenVariable('lwork', 'int', tg, const=True) cached_vars = ArgDict() if is_cached: for i in range(work_dim): Vi = self.svelocity+self.xyz[i] if local_size_known: - Vic = CodegenArray(name=Vi+'c',dim=1,ctype=ftype,typegen=tg, - shape=(local_size.value[0],), storage=storage) + Vic = CodegenArray(name=Vi+'c', dim=1, ctype=ftype, typegen=tg, + shape=(local_size.value[0],), storage=storage) else: buf = s.vars['buffer'] init = '{} + {}*{}'.format(buf(), i, local_size[0]) - Vic = CodegenVariable(storage=storage,name=Vi+'c',ctype=ftype,typegen=tg, - const=True, ptr_restrict=True,ptr=True,init=init) + Vic = CodegenVariable(storage=storage, name=Vi+'c', ctype=ftype, typegen=tg, + const=True, ptr_restrict=True, ptr=True, init=init) cached_vars[Vi] = Vic if is_conservative: Wi = self.svorticity+self.xyz[direction] if local_size_known: - Wic = CodegenArray(storage=storage,name=Wi+'c',dim=1,ctype=ftype, - typegen=tg,shape=(local_size.value[0],)) + Wic = CodegenArray(storage=storage, name=Wi+'c', dim=1, ctype=ftype, + typegen=tg, shape=(local_size.value[0],)) else: buf = s.vars['buffer'] init = '{} + {}*{}'.format(buf(), work_dim, local_size[0]) - Wic = CodegenVariable(storage=storage,name=Wi+'c',ctype=ftype,typegen=tg, - const=True, ptr_restrict=True,ptr=True,init=init) + Wic = CodegenVariable(storage=storage, name=Wi+'c', ctype=ftype, typegen=tg, + const=True, ptr_restrict=True, ptr=True, init=init) cached_vars[Wi] = Wic - _U = self.svelocity + _U = self.svelocity size = cache_ghosts.value - Ur = CodegenArray(storage='__local',name=_U+'r',dim=1,ctype=vtype,typegen=tg, - shape=(2*size,)) - _W = self.svorticity + Ur = CodegenArray(storage='__local', name=_U+'r', dim=1, ctype=vtype, typegen=tg, + shape=(2*size,)) + _W = self.svorticity size = cache_ghosts.value - Wr = CodegenArray(storage='__local',name=_W+'r',dim=1,ctype=vtype,typegen=tg, - shape=(2*size,)) + Wr = CodegenArray(storage='__local', name=_W+'r', dim=1, ctype=vtype, typegen=tg, + shape=(2*size,)) if is_periodic: - Ul = CodegenArray(storage='__local',name=_U+'l',dim=1,ctype=vtype,typegen=tg, - shape=(2*size,)) - Wl = CodegenArray(storage='__local',name=_W+'l',dim=1,ctype=vtype,typegen=tg, - shape=(2*size,)) - + Ul = CodegenArray(storage='__local', name=_U+'l', dim=1, ctype=vtype, typegen=tg, + shape=(2*size,)) + Wl = CodegenArray(storage='__local', name=_W+'l', dim=1, ctype=vtype, typegen=tg, + shape=(2*size,)) @contextlib.contextmanager def _work_iterate_(i): try: - if i==0: + if i == 0: fval = '0' gsize = local_work() N = '(({}+2*{}+{lwork}-1)/{lwork})*{lwork}'.format(compute_grid_size[i], - cache_ghosts(),lwork=local_work()) - ghosts = '({}-{})'.format(compute_grid_ghosts[i],cache_ghosts()) + cache_ghosts(), lwork=local_work()) + ghosts = '({}-{})'.format(compute_grid_ghosts[i], cache_ghosts()) else: fval = global_id.fval(i) gsize = global_size[i] - N = '{Sx}'.format(Sx=compute_grid_size[i]) + N = '{Sx}'.format(Sx=compute_grid_size[i]) ghosts = compute_grid_ghosts[i] s.append('#pragma unroll 4') with s._for_('int {i}={fval}; {i}<{N}; {i}+={gsize}'.format( - i='kji'[i], fval=fval, gsize=gsize,N=N)) as ctx: + i='kji'[i], fval=fval, gsize=gsize, N=N)) as ctx: - if i==0: + if i == 0: s.append('{} = {}+{};'.format(global_id[i], 'kji'[i], local_id[0])) else: s.append('{} = {}+{};'.format(global_id[i], 'kji'[i], ghosts)) - if i==0: + if i == 0: active.declare(s, init='({} < {}+2*{})'.format( global_id[0], compute_grid_size[0], cache_ghosts())) s.append('{} += {};'.format(global_id[i], ghosts)) - elif i==1: + elif i == 1: first.declare(s) yield ctx except: raise - nested_loops = [_work_iterate_(i) for i in range(dim-1,-1,-1)] + nested_loops = [_work_iterate_(i) for i in range(dim-1, -1, -1)] @contextlib.contextmanager def if_thread_active(): @@ -469,28 +469,28 @@ class DirectionalStretchingKernel(KernelCodeGenerator): with s._kernel_(): s.jumpline() with s._align_() as al: - local_id.declare(al,align=True,const=True) - global_size.declare(al,align=True,const=True) - local_size.declare(al,align=True,const=True) + local_id.declare(al, align=True, const=True) + global_size.declare(al, align=True, const=True) + local_size.declare(al, align=True, const=True) s.jumpline() with s._align_() as al: - compute_grid_size.declare(al,const=True,align=True) - compute_grid_ghosts.declare(al,align=True) - grid_size.declare(al,align=True,const=True) - inv_dx.declare(al,align=True) + compute_grid_size.declare(al, const=True, align=True) + compute_grid_ghosts.declare(al, align=True) + grid_size.declare(al, align=True, const=True) + inv_dx.declare(al, align=True) s.jumpline() with s._align_() as al: - cache_ghosts.declare(al,align=True) - local_work.declare(al,align=True, - init='{} - 2*{}'.format(local_size[0],cache_ghosts())) + cache_ghosts.declare(al, align=True) + local_work.declare(al, align=True, + init='{} - 2*{}'.format(local_size[0], cache_ghosts())) s.jumpline() if is_cached: with s._align_() as al: - for varname,var in cached_vars.items(): - var.declare(al,align=True) + for varname, var in cached_vars.items(): + var.declare(al, align=True) s.jumpline() Ur.declare(s) @@ -504,7 +504,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): s.jumpline() - global_id.declare(s,init=False) + global_id.declare(s, init=False) s.jumpline() with nested(*nested_loops): @@ -512,19 +512,18 @@ class DirectionalStretchingKernel(KernelCodeGenerator): init = compute_index(idx=global_id, size=grid_size) global_index.declare(s, init=init, const=True) - winit, uinit = '','' + winit, uinit = '', '' for i in range(work_dim): - Wi_in = self.svorticity+self.xyz[i]+'_in' + Wi_in = self.svorticity+self.xyz[i]+'_in' Wi_out = self.svorticity+self.xyz[i]+'_out' Ui = self.svelocity+self.xyz[i] uinit += self.args[Ui][global_index()] + ',' winit += self.args[Wi_in][global_index()] + ',' - uinit='({}{})({})'.format(ftype, work_dim, uinit[:-1]) - winit='({}{})({})'.format(ftype, work_dim, winit[:-1]) - + uinit = '({}{})({})'.format(ftype, work_dim, uinit[:-1]) + winit = '({}{})({})'.format(ftype, work_dim, winit[:-1]) s.jumpline() - s.append('{} {},{};'.format(U.ctype,U(),W())) + s.append('{} {},{};'.format(U.ctype, U(), W())) with if_thread_active(): with s._if_('{}'.format(first())): s.append('{} = {};'.format(U(), uinit)) @@ -532,87 +531,82 @@ class DirectionalStretchingKernel(KernelCodeGenerator): s.append('{} = false;'.format(first())) if is_periodic: with s._if_('{lid} < 2*{ghosts}'.format(lid=local_id[0], - ghosts=cache_ghosts())): + ghosts=cache_ghosts())): s.append('{} = {};'.format(Ul[local_id[0]], U())) s.append('{} = {};'.format(Wl[local_id[0]], W())) with s._else_(): if is_periodic: with s._if_('{} >= {}-{}'.format(global_id[0], - compute_grid_size[0],cache_ghosts())): + compute_grid_size[0], cache_ghosts())): _id = '{}-{}+{}'.format(global_id[0], - compute_grid_size[0],cache_ghosts()) + compute_grid_size[0], cache_ghosts()) s.append('{} = {};'.format(U(), Ul[_id])) s.append('{} = {};'.format(W(), Wl[_id])) - with s._elif_('{} < 2*{}'.format(local_id[0],cache_ghosts())): + with s._elif_('{} < 2*{}'.format(local_id[0], cache_ghosts())): s.append('{} = {};'.format(U(), Ur[local_id[0]])) s.append('{} = {};'.format(W(), Wr[local_id[0]])) else: - with s._if_('{} < 2*{}'.format(local_id[0],cache_ghosts())): + with s._if_('{} < 2*{}'.format(local_id[0], cache_ghosts())): s.append('{} = {};'.format(U(), Ur[local_id[0]])) s.append('{} = {};'.format(W(), Wr[local_id[0]])) with s._else_(): s.append('{} = {};'.format(U(), uinit)) s.append('{} = {};'.format(W(), winit)) - s.barrier(_local=True) s.jumpline() - with if_thread_active(): if self.is_cached: for i in range(work_dim): Ui = self.svelocity+self.xyz[i] Uic = cached_vars[Ui] - code = '{} = {};'.format(Uic[local_id[0]],U[i]) + code = '{} = {};'.format(Uic[local_id[0]], U[i]) s.append(code) s.jumpline() - - with s._if_('{} >= {}-2*{}'.format(local_id[0],local_size[0], - cache_ghosts())): - _id = '{}-{}+2*{}'.format(local_id[0],local_size[0],cache_ghosts()) + with s._if_('{} >= {}-2*{}'.format(local_id[0], local_size[0], + cache_ghosts())): + _id = '{}-{}+2*{}'.format(local_id[0], local_size[0], cache_ghosts()) s.append('{} = {};'.format(Ur[_id], U())) s.append('{} = {};'.format(Wr[_id], W())) - s.barrier(_local=True) s.jumpline() - - rk_args={'dt': dt, - 'inv_dx': inv_dx, - 'W': W, - 'active': active, - 'Lx' : local_size[0], - 'lidx' : local_id[0]} + rk_args = {'dt': dt, + 'inv_dx': inv_dx, + 'W': W, + 'active': active, + 'Lx': local_size[0], + 'lidx': local_id[0]} if is_periodic and (not is_cached): - base = CodegenVariable('base','int',typegen=tg,const=True) - base.declare(s,init='({}/{}) * {}'.format(global_index(),grid_size[0], - grid_size[0])) - offset = CodegenVariable('offset','int',typegen=tg,const=True) - offset.declare(s,init='{}-{}'.format(global_index(),base())) - rk_args['base'] = base + base = CodegenVariable('base', 'int', typegen=tg, const=True) + base.declare(s, init='({}/{}) * {}'.format(global_index(), grid_size[0], + grid_size[0])) + offset = CodegenVariable('offset', 'int', typegen=tg, const=True) + offset.declare(s, init='{}-{}'.format(global_index(), base())) + rk_args['base'] = base rk_args['offset'] = offset - rk_args['width'] = grid_size[0] + rk_args['width'] = grid_size[0] else: rk_args['offset'] = local_id[0] if is_cached else global_index for i in range(work_dim): - Ui_name = self.svelocity+xyz[i] + Ui_name = self.svelocity+xyz[i] if is_cached: Ui = cached_vars[Ui_name] else: - Ui = s.vars[Ui_name] + Ui = s.vars[Ui_name] rk_args[Ui_name] = Ui if is_conservative: - Wd_name = self.svorticity+xyz[direction] + Wd_name = self.svorticity+xyz[direction] if is_cached: Wd = cached_vars[Wd_name] else: - Wd = s.vars[Wd_name] + Wd = s.vars[Wd_name] rk_args[Wd_name] = Wd call = runge_kutta(**rk_args) @@ -624,332 +618,329 @@ class DirectionalStretchingKernel(KernelCodeGenerator): if is_periodic: cond = '({lid}>={ghosts}) && ({lid}<{L}-{ghosts}) && ({gidx}<{size})' cond = cond.format( - lid=local_id[0], ghosts=cache_ghosts(), L=local_size[0], - gidx=global_id[0], size=compute_grid_size[0]) + lid=local_id[0], ghosts=cache_ghosts(), L=local_size[0], + gidx=global_id[0], size=compute_grid_size[0]) else: cond = '({lid}>={ghosts}) && ({lid}<{L}-{ghosts})'.format( - lid=local_id[0], ghosts=cache_ghosts(), L=local_size[0]) + lid=local_id[0], ghosts=cache_ghosts(), L=local_size[0]) with s._if_(cond): for i in range(work_dim): Wi_out = self.svorticity+self.xyz[i]+'_out' Wi_out = s.vars[Wi_out] - code='{} = {};'.format(Wi_out[global_index()], W[i]) + code = '{} = {};'.format(Wi_out[global_index()], W[i]) s.append(code) - def per_work_statistics(self): - tg = self.typegen - dim = self.dim - ftype = self.ftype + tg = self.typegen + dim = self.dim + ftype = self.ftype cached = self.is_cached - compute_id = self.reqs['compute_id'] + compute_id = self.reqs['compute_id'] runge_kutta = self.reqs['runge_kutta'] - stats = compute_id.per_work_statistics() + stats = compute_id.per_work_statistics() stats += runge_kutta.per_work_statistics() size = tg.FLT_BYTES[ftype] - stats.global_mem_byte_reads += dim*size + stats.global_mem_byte_reads += dim*size stats.global_mem_byte_writes += dim*size return stats if False: @staticmethod - def autotune(cl_env, typegen, build_options,autotuner_config, - direction, time_integrator, formulation, discretization, - velocity, vorticity_in, vorticity_out, - velocity_mesh_info, vorticity_mesh_info): - - dir = direction - - if not isinstance(cl_env,OpenClEnvironment): - raise ValueError('cl_env is not an OpenClEnvironment.') - if not isinstance(typegen,OpenClTypeGen): - raise ValueError('typegen is not an OpenClTypeGen.') - - precision = typegen.dtype - ftype = typegen.fbtype - - device = cl_env.device - context = cl_env.context - platform = cl_env.platform - queue = cl_env.default_queue - - if context is None: - raise ValueError('context cannot be None.') - if device is None: - raise ValueError('device cannot be None.') - if platform is None: - raise ValueError('platform cannot be None.') - if queue is None: - raise ValueError('queue cannot be None.') - if typegen.platform!=platform or typegen.device!=device: - raise ValueError('platform or device mismatch.') - - if not isinstance(velocity,DiscreteScalarFieldView) or \ - velocity.backend.kind != Backend.OPENCL: - raise ValueError('velocity is not a DiscreteScalarFieldView of kind OpenCL.') - if not isinstance(vorticity_in,DiscreteScalarFieldView) or \ - velocity.backend.kind != Backend.OPENCL: - raise ValueError('vorticity_in is not a DiscreteScalarFieldView of kind OpenCL.') - if not isinstance(vorticity_out,DiscreteScalarFieldView) or \ - velocity.backend.kind != Backend.OPENCL: - raise ValueError('vorticity_out is not a DiscreteScalarFieldView of kind OpenCL') - - dim = velocity.nb_components - if dim != 3: - raise ValueError('Stretching only appears in 3D...') - if (direction>=dim): - raise ValueError('direction >= dim.') - if (velocity.nb_components != dim) or \ - (vorticity_in.nb_components != dim) or \ - (vorticity_out.nb_components != dim): - raise ValueError('Vector components mismatch with dim {}.'.format(dim)) - - if not isinstance(time_integrator, ExplicitRungeKutta): - msg = 'Given time integrator is not an instance of ExplicitRungeKutta, ' - msg+='got a {}.'.format(time_integrator.__class__) - raise TypeError(msg) - - if not isinstance(formulation, StretchingFormulation): - msg = 'Unknown stretching formulation of type \'{}\', valid ones are {}.' - msg=msg.format(formulation.__class__, formulation.svalues()) - raise TypeError(msg) - - if not isinstance(discretization, SpaceDiscretization): - msg='Discretization parameter is not an instance of SpaceDiscretization, ' - msg+='but a {}.'.format(discretization.__class__) - raise TypeError(msg) - elif discretization == SpaceDiscretization.FDC2: - order=2 - elif discretization == SpaceDiscretization.FDC4: - order=4 - elif discretization == SpaceDiscretization.FDC6: - order=6 - elif discretization == SpaceDiscretization.FDC8: - order=8 - else: - msg='Unknown discretization {}.'.format(discretization) - raise ValueError(msg) - if order%2 != 0: - raise ValueError('order must be even.') - - if (not isinstance(velocity_mesh_info, CodegenStruct)) or \ - (not velocity_mesh_info.known()): - msg='velocity_mesh_info is not a known MeshInfoStruct codegen variable.' - raise ValueError(msg) - if (not isinstance(vorticity_mesh_info, CodegenStruct)) or \ - (not vorticity_mesh_info.known()): - msg='vorticity_mesh_info is not a known MeshInfoStruct codegen variable.' - raise ValueError(msg) - - v_resolution = velocity_mesh_info['local_mesh']['compute_resolution'].value[:dim] - v_lboundary = velocity_mesh_info['local_mesh']['lboundary'].value[:dim] - v_rboundary = velocity_mesh_info['local_mesh']['rboundary'].value[:dim] - v_ghosts = velocity_mesh_info['ghosts'].value[:dim] - v_dx = velocity_mesh_info['dx'].value[:dim] - - w_resolution = vorticity_mesh_info['local_mesh']['compute_resolution'].value[:dim] - w_lboundary = vorticity_mesh_info['local_mesh']['lboundary'].value[:dim] - w_rboundary = vorticity_mesh_info['local_mesh']['rboundary'].value[:dim] - w_ghosts = vorticity_mesh_info['ghosts'].value[:dim] - w_dx = vorticity_mesh_info['dx'].value[:dim] - - is_multi_scale = (v_resolution != w_resolution).any() - is_inplace = (vorticity_in.data[0].data == vorticity_out.data[0].data) \ - or (vorticity_in.data[1].data == vorticity_out.data[1].data) \ - or (vorticity_in.data[2].data == vorticity_out.data[2].data) - - - w_boundary = (w_lboundary[dir], w_rboundary[dir]) - v_boundary = (v_lboundary[dir], v_rboundary[dir]) - if (v_boundary != w_boundary): - msg = 'Boundaries mismatch:\n *velocity: {}\n *vorticity: {}\n' - msg=msg.formulation(v_boundary, w_boundary) - raise ValueError(boundary) - boundary = v_boundary - - (min_v_ghosts, min_w_ghosts) = DirectionalStretchingKernel.min_ghosts( - boundary, formulation, order, time_integrator, direction) - - assert (min_v_ghosts>=0).all() - assert (min_w_ghosts>=0).all() - - if (v_ghosts < min_v_ghosts).any(): - msg= 'Given boundary condition implies minimum ghosts numbers to be at least {} ' - msg+='in current direction for velocity but only {} ghosts ' - msg+='are present in the grid.' - msg=msg.format(min_v_ghosts, v_ghosts) - raise RuntimeError(msg) - - if (w_ghosts < min_w_ghosts).any(): - msg= 'Given boundary condition implies minimum ghosts numbers to be at least {} ' - msg+='in current direction for position but only {} ghosts ' - msg+='are present in the grid.' - msg=msg.format(min_w_ghosts, w_ghosts) - raise RuntimeError(msg) - - if is_multi_scale: - msg='Compute_resolution mismatch between velocity and vorticity, ' - msg+='got {} and {} and multiscale has not been implemented yet.' - msg=msg.format(v_resolution,w_resolution) - raise RuntimeError(msg) - - min_wg_size = DirectionalStretchingKernel.min_wg_size(formulation, order, - time_integrator) - - # work size is the resolution without ghosts - compute_resolution = w_resolution - work_size = np.ones(3,dtype=np.int32) - work_size[:dim] = compute_resolution - - # autotuner parameters - - dump_src = __KERNEL_DEBUG__ - symbolic_mode = False #__KERNEL_DEBUG__ - - min_local_size = np.maximum( min_wg_size, - [clCharacterize.get_simd_group_size(device,1),1,1]) - - caching_options = [True] - if formulation != StretchingFormulation.CONSERVATIVE: - caching_options.append(False) - - autotuner_flag = autotuner_config.autotuner_flag - if (autotuner_flag == AutotunerFlags.ESTIMATE): - max_workitem_workload = (1,1,1) - elif (autotuner_flag == AutotunerFlags.MEASURE): - max_workitem_workload = (1,1,8) - elif (autotuner_flag == AutotunerFlags.PATIENT): - max_workitem_workload = (1,8,8) - elif (autotuner_flag == AutotunerFlags.EXHAUSTIVE): - max_workitem_workload = (1,16,16) - - ## kernel generator - def kernel_generator(work_size, work_load, local_work_size, - kernel_args, - extra_parameters, - force_verbose = False, - force_debug = False, - return_codegen = False, - **kargs): - - ## Compile time known variables + def autotune(cl_env, typegen, build_options, autotuner_config, + direction, time_integrator, formulation, discretization, + velocity, vorticity_in, vorticity_out, + velocity_mesh_info, vorticity_mesh_info): + + dir = direction + + if not isinstance(cl_env, OpenClEnvironment): + raise ValueError('cl_env is not an OpenClEnvironment.') + if not isinstance(typegen, OpenClTypeGen): + raise ValueError('typegen is not an OpenClTypeGen.') + + precision = typegen.dtype + ftype = typegen.fbtype + + device = cl_env.device + context = cl_env.context + platform = cl_env.platform + queue = cl_env.default_queue + + if context is None: + raise ValueError('context cannot be None.') + if device is None: + raise ValueError('device cannot be None.') + if platform is None: + raise ValueError('platform cannot be None.') + if queue is None: + raise ValueError('queue cannot be None.') + if typegen.platform != platform or typegen.device != device: + raise ValueError('platform or device mismatch.') + + if not isinstance(velocity, DiscreteScalarFieldView) or \ + velocity.backend.kind != Backend.OPENCL: + raise ValueError('velocity is not a DiscreteScalarFieldView of kind OpenCL.') + if not isinstance(vorticity_in, DiscreteScalarFieldView) or \ + velocity.backend.kind != Backend.OPENCL: + raise ValueError('vorticity_in is not a DiscreteScalarFieldView of kind OpenCL.') + if not isinstance(vorticity_out, DiscreteScalarFieldView) or \ + velocity.backend.kind != Backend.OPENCL: + raise ValueError('vorticity_out is not a DiscreteScalarFieldView of kind OpenCL') + + dim = velocity.nb_components + if dim != 3: + raise ValueError('Stretching only appears in 3D...') + if (direction >= dim): + raise ValueError('direction >= dim.') + if (velocity.nb_components != dim) or \ + (vorticity_in.nb_components != dim) or \ + (vorticity_out.nb_components != dim): + raise ValueError('Vector components mismatch with dim {}.'.format(dim)) + + if not isinstance(time_integrator, ExplicitRungeKutta): + msg = 'Given time integrator is not an instance of ExplicitRungeKutta, ' + msg += 'got a {}.'.format(time_integrator.__class__) + raise TypeError(msg) + + if not isinstance(formulation, StretchingFormulation): + msg = 'Unknown stretching formulation of type \'{}\', valid ones are {}.' + msg = msg.format(formulation.__class__, formulation.svalues()) + raise TypeError(msg) + + if not isinstance(discretization, SpaceDiscretization): + msg = 'Discretization parameter is not an instance of SpaceDiscretization, ' + msg += 'but a {}.'.format(discretization.__class__) + raise TypeError(msg) + elif discretization == SpaceDiscretization.FDC2: + order = 2 + elif discretization == SpaceDiscretization.FDC4: + order = 4 + elif discretization == SpaceDiscretization.FDC6: + order = 6 + elif discretization == SpaceDiscretization.FDC8: + order = 8 + else: + msg = 'Unknown discretization {}.'.format(discretization) + raise ValueError(msg) + if order % 2 != 0: + raise ValueError('order must be even.') + + if (not isinstance(velocity_mesh_info, CodegenStruct)) or \ + (not velocity_mesh_info.known()): + msg = 'velocity_mesh_info is not a known MeshInfoStruct codegen variable.' + raise ValueError(msg) + if (not isinstance(vorticity_mesh_info, CodegenStruct)) or \ + (not vorticity_mesh_info.known()): + msg = 'vorticity_mesh_info is not a known MeshInfoStruct codegen variable.' + raise ValueError(msg) + + v_resolution = velocity_mesh_info['local_mesh']['compute_resolution'].value[:dim] + v_lboundary = velocity_mesh_info['local_mesh']['lboundary'].value[:dim] + v_rboundary = velocity_mesh_info['local_mesh']['rboundary'].value[:dim] + v_ghosts = velocity_mesh_info['ghosts'].value[:dim] + v_dx = velocity_mesh_info['dx'].value[:dim] + + w_resolution = vorticity_mesh_info['local_mesh']['compute_resolution'].value[:dim] + w_lboundary = vorticity_mesh_info['local_mesh']['lboundary'].value[:dim] + w_rboundary = vorticity_mesh_info['local_mesh']['rboundary'].value[:dim] + w_ghosts = vorticity_mesh_info['ghosts'].value[:dim] + w_dx = vorticity_mesh_info['dx'].value[:dim] + + is_multi_scale = (v_resolution != w_resolution).any() + is_inplace = (vorticity_in.data[0].data == vorticity_out.data[0].data) \ + or (vorticity_in.data[1].data == vorticity_out.data[1].data) \ + or (vorticity_in.data[2].data == vorticity_out.data[2].data) + + w_boundary = (w_lboundary[dir], w_rboundary[dir]) + v_boundary = (v_lboundary[dir], v_rboundary[dir]) + if (v_boundary != w_boundary): + msg = 'Boundaries mismatch:\n *velocity: {}\n *vorticity: {}\n' + msg = msg.formulation(v_boundary, w_boundary) + raise ValueError(boundary) + boundary = v_boundary + + (min_v_ghosts, min_w_ghosts) = DirectionalStretchingKernel.min_ghosts( + boundary, formulation, order, time_integrator, direction) + + assert (min_v_ghosts >= 0).all() + assert (min_w_ghosts >= 0).all() + + if (v_ghosts < min_v_ghosts).any(): + msg = 'Given boundary condition implies minimum ghosts numbers to be at least {} ' + msg += 'in current direction for velocity but only {} ghosts ' + msg += 'are present in the grid.' + msg = msg.format(min_v_ghosts, v_ghosts) + raise RuntimeError(msg) + + if (w_ghosts < min_w_ghosts).any(): + msg = 'Given boundary condition implies minimum ghosts numbers to be at least {} ' + msg += 'in current direction for position but only {} ghosts ' + msg += 'are present in the grid.' + msg = msg.format(min_w_ghosts, w_ghosts) + raise RuntimeError(msg) + + if is_multi_scale: + msg = 'Compute_resolution mismatch between velocity and vorticity, ' + msg += 'got {} and {} and multiscale has not been implemented yet.' + msg = msg.format(v_resolution, w_resolution) + raise RuntimeError(msg) + + min_wg_size = DirectionalStretchingKernel.min_wg_size(formulation, order, + time_integrator) + + # work size is the resolution without ghosts + compute_resolution = w_resolution + work_size = np.ones(3, dtype=np.int32) + work_size[:dim] = compute_resolution + + # autotuner parameters + + dump_src = __KERNEL_DEBUG__ + symbolic_mode = False # __KERNEL_DEBUG__ + + min_local_size = np.maximum(min_wg_size, + [clCharacterize.get_simd_group_size(device, 1), 1, 1]) + + caching_options = [True] + if formulation != StretchingFormulation.CONSERVATIVE: + caching_options.append(False) + + autotuner_flag = autotuner_config.autotuner_flag + if (autotuner_flag == AutotunerFlags.ESTIMATE): + max_workitem_workload = (1, 1, 1) + elif (autotuner_flag == AutotunerFlags.MEASURE): + max_workitem_workload = (1, 1, 8) + elif (autotuner_flag == AutotunerFlags.PATIENT): + max_workitem_workload = (1, 8, 8) + elif (autotuner_flag == AutotunerFlags.EXHAUSTIVE): + max_workitem_workload = (1, 16, 16) + + # kernel generator + def kernel_generator(work_size, work_load, local_work_size, + kernel_args, + extra_parameters, + force_verbose=False, + force_debug=False, + return_codegen=False, + **kargs): + + # Compile time known variables # dt is not known because it depends on splitting direction # and simulation current time_step - known_vars = dict( - velocity_mesh_info=velocity_mesh_info, - vorticity_mesh_info=vorticity_mesh_info, - local_size=local_work_size[:dim] - ) - - ## CodeGenerator - cached=True - codegen = DirectionalStretchingKernel(typegen=typegen, - order=order, dim=dim, direction=direction, boundary=boundary, - formulation=formulation, time_integrator=time_integrator, - is_inplace=is_inplace, - symbolic_mode=symbolic_mode, ftype=ftype, - known_vars=known_vars, **extra_parameters) - - global_size = codegen.get_global_size(work_size=work_size, - work_load=work_load, local_work_size=local_work_size) - - usable_cache_bytes_per_wg = clCharacterize.usable_local_mem_size(device) - if codegen.required_workgroup_cache_size(local_work_size[:dim])[2] > \ - usable_cache_bytes_per_wg: - raise KernelGenerationError('Insufficient device cache.') - - ## generate source code and build kernel - src = codegen.__str__() - src_hash = hashlib.sha512(src).hexdigest() - prg = cl_env.build_raw_src(src, build_options, - kernel_name=codegen.name, - force_verbose=force_verbose, force_debug=force_debug) - kernel = prg.all_kernels()[0] - - if return_codegen: - return (codegen, kernel, kernel_args, src_hash, global_size) - else: - return (kernel, kernel_args, src_hash, global_size) + known_vars = dict( + velocity_mesh_info=velocity_mesh_info, + vorticity_mesh_info=vorticity_mesh_info, + local_size=local_work_size[:dim] + ) + + # CodeGenerator + cached = True + codegen = DirectionalStretchingKernel(typegen=typegen, + order=order, dim=dim, direction=direction, boundary=boundary, + formulation=formulation, time_integrator=time_integrator, + is_inplace=is_inplace, + symbolic_mode=symbolic_mode, ftype=ftype, + known_vars=known_vars, **extra_parameters) + + global_size = codegen.get_global_size(work_size=work_size, + work_load=work_load, local_work_size=local_work_size) + + usable_cache_bytes_per_wg = clCharacterize.usable_local_mem_size(device) + if codegen.required_workgroup_cache_size(local_work_size[:dim])[2] > \ + usable_cache_bytes_per_wg: + raise KernelGenerationError('Insufficient device cache.') + + # generate source code and build kernel + src = codegen.__str__() + src_hash = hashlib.sha512(src).hexdigest() + prg = cl_env.build_raw_src(src, build_options, + kernel_name=codegen.name, + force_verbose=force_verbose, force_debug=force_debug) + kernel = prg.all_kernels()[0] + + if return_codegen: + return (codegen, kernel, kernel_args, src_hash, global_size) + else: + return (kernel, kernel_args, src_hash, global_size) - ## Kernel Autotuner - name = DirectionalStretchingKernel.codegen_name(ftype, False, is_inplace, - direction, formulation) + # Kernel Autotuner + name = DirectionalStretchingKernel.codegen_name(ftype, False, is_inplace, + direction, formulation) - autotuner = KernelAutotuner(name=name, work_dim=dim, local_work_dim=1, - build_opts=build_options, autotuner_config=autotuner_config) - autotuner.add_filter('1d_shape_min', autotuner.min_workitems_per_direction) - autotuner.register_extra_parameter('is_cached', caching_options) - autotuner.enable_variable_workitem_workload( - max_workitem_workload=max_workitem_workload) + autotuner = KernelAutotuner(name=name, work_dim=dim, local_work_dim=1, + build_opts=build_options, autotuner_config=autotuner_config) + autotuner.add_filter('1d_shape_min', autotuner.min_workitems_per_direction) + autotuner.register_extra_parameter('is_cached', caching_options) + autotuner.enable_variable_workitem_workload( + max_workitem_workload=max_workitem_workload) - dt=1.0 - kernel_args = [precision(dt)] - kernel_args += velocity.buffers + vorticity_in.buffers+ vorticity_out.buffers + dt = 1.0 + kernel_args = [precision(dt)] + kernel_args += velocity.buffers + vorticity_in.buffers + vorticity_out.buffers - kernel_args_mapping = { - 'dt': (0,precision), - 'velocity': (slice(1+0*dim, 1+1*dim, 1), cl.MemoryObjectHolder), - 'vorticity_in': (slice(1+1*dim, 1+2*dim, 1), cl.MemoryObjectHolder), - 'vorticity_out': (slice(1+2*dim, 1+3*dim, 1), cl.MemoryObjectHolder), - } + kernel_args_mapping = { + 'dt': (0, precision), + 'velocity': (slice(1+0*dim, 1+1*dim, 1), cl.MemoryObjectHolder), + 'vorticity_in': (slice(1+1*dim, 1+2*dim, 1), cl.MemoryObjectHolder), + 'vorticity_out': (slice(1+2*dim, 1+3*dim, 1), cl.MemoryObjectHolder), + } - (gwi, lwi, stats, work_load, extra_params) = autotuner.bench(typegen=typegen, - work_size=work_size, kernel_args=kernel_args, - kernel_generator=kernel_generator, - dump_src=dump_src, - min_local_size=min_local_size, - get_max_global_size=DirectionalStretchingKernel.get_max_global_size) + (gwi, lwi, stats, work_load, extra_params) = autotuner.bench(typegen=typegen, + work_size=work_size, kernel_args=kernel_args, + kernel_generator=kernel_generator, + dump_src=dump_src, + min_local_size=min_local_size, + get_max_global_size=DirectionalStretchingKernel.get_max_global_size) - (codegen, kernel, kernel_args, src_hash, global_size) = kernel_generator( - work_size=work_size, work_load=work_load, - local_work_size=lwi, kernel_args=kernel_args, - extra_parameters=extra_params, - force_verbose=False,force_debug=False, - return_codegen=True) + (codegen, kernel, kernel_args, src_hash, global_size) = kernel_generator( + work_size=work_size, work_load=work_load, + local_work_size=lwi, kernel_args=kernel_args, + extra_parameters=extra_params, + force_verbose=False, force_debug=False, + return_codegen=True) - kernel_launcher = None#OpenClKernelLauncher(kernel, queue, list(gwi), list(lwi)) + kernel_launcher = None # OpenClKernelLauncher(kernel, queue, list(gwi), list(lwi)) - total_work = work_size[0]*work_size[1]*work_size[2] - per_work_statistics = codegen.per_work_statistics() + total_work = work_size[0]*work_size[1]*work_size[2] + per_work_statistics = codegen.per_work_statistics() - cache_info = codegen.required_workgroup_cache_size(lwi) + cache_info = codegen.required_workgroup_cache_size(lwi) - return (kernel_launcher, kernel_args, kernel_args_mapping, - total_work, per_work_statistics, cache_info) + return (kernel_launcher, kernel_args, kernel_args_mapping, + total_work, per_work_statistics, cache_info) if __name__ == '__main__': from hysop.backend.device.opencl import cl from hysop.backend.device.codegen.base.test import _test_mesh_info, _test_typegen - dim=3 - ghosts=(0,0,0) - v_resolution=(256,128,64) - w_resolution=(1024,512,256) - local_size = (1024,1,1) + dim = 3 + ghosts = (0, 0, 0) + v_resolution = (256, 128, 64) + w_resolution = (1024, 512, 256) + local_size = (1024, 1, 1) tg = _test_typegen('float') - (_,w_mesh_info) = _test_mesh_info('vorticity_mesh_info',tg,dim,ghosts,w_resolution) - (_,v_mesh_info) = _test_mesh_info('velocity_mesh_info',tg,dim,ghosts,v_resolution) + (_, w_mesh_info) = _test_mesh_info('vorticity_mesh_info', tg, dim, ghosts, w_resolution) + (_, v_mesh_info) = _test_mesh_info('velocity_mesh_info', tg, dim, ghosts, v_resolution) dsk = DirectionalStretchingKernel(typegen=tg, ftype=tg.fbtype, - order=4, dim=dim, direction=0, - formulation=StretchingFormulation.CONSERVATIVE, - time_integrator=ExplicitRungeKutta('RK4'), - is_cached=True, - is_inplace=True, - symbolic_mode=True, - boundary=(BoundaryCondition.NONE,BoundaryCondition.NONE), - known_vars=dict( - velocity_mesh_info=v_mesh_info, - vorticity_mesh_info=w_mesh_info, - local_size=local_size[:dim] - ) - ) + order=4, dim=dim, direction=0, + formulation=StretchingFormulation.CONSERVATIVE, + time_integrator=ExplicitRungeKutta('RK4'), + is_cached=True, + is_inplace=True, + symbolic_mode=True, + boundary=(BoundaryCondition.NONE, BoundaryCondition.NONE), + known_vars=dict( + velocity_mesh_info=v_mesh_info, + vorticity_mesh_info=w_mesh_info, + local_size=local_size[:dim] + ) + ) dsk.edit() dsk.test_compile() - diff --git a/hysop/backend/device/codegen/kernels/transpose.py b/hysop/backend/device/codegen/kernels/transpose.py index 62b6b5277..0a1196dc8 100644 --- a/hysop/backend/device/codegen/kernels/transpose.py +++ b/hysop/backend/device/codegen/kernels/transpose.py @@ -14,53 +14,54 @@ from hysop.backend.device.opencl import clCharacterize from hysop.backend.device.opencl.opencl_types import OpenClTypeGen from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.device.codegen.base.kernel_codegen import KernelCodeGenerator -from hysop.backend.device.codegen.base.variables import CodegenVariable, \ - CodegenVectorClBuiltin, CodegenArray, ctype_to_dtype -from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict +from hysop.backend.device.codegen.base.variables import CodegenVariable, \ + CodegenVectorClBuiltin, CodegenArray, ctype_to_dtype +from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.device.codegen.functions.compute_index import ComputeIndexFunction + class TransposeKernelGenerator(KernelCodeGenerator): n_dbg_arrays = 2 @staticmethod def codegen_name(is_inplace, axes, ctype, - tile_size, tile_padding, vectorization, - use_diagonal_coordinates): + tile_size, tile_padding, vectorization, + use_diagonal_coordinates): pdim = len(axes) - axes = [ str(j) if i!=j else 'X' for i,j in enumerate(axes) ] + axes = [str(j) if i != j else 'X' for i, j in enumerate(axes)] return 'transpose{}_{}_{}_{}d__N{}__T{}__P{}__{}'.format( - '_dc' if use_diagonal_coordinates else '_nc', - 'inplace' if is_inplace else 'out_of_place', - ctype.replace(' ','_'), pdim, vectorization, tile_size, tile_padding, - '_'.join(axes)) + '_dc' if use_diagonal_coordinates else '_nc', + 'inplace' if is_inplace else 'out_of_place', + ctype.replace(' ', '_'), pdim, vectorization, tile_size, tile_padding, + '_'.join(axes)) @classmethod def characterize_permutation(cls, shape, axes, max_device_workdim): pdim = len(axes) - assert pdim>=2 - assert set(axes)==set(range(pdim)) + assert pdim >= 2 + assert set(axes) == set(range(pdim)) contiguous_permutation = (axes[-1] != (pdim-1)) if contiguous_permutation: tile_indexes = (pdim-1, axes[-1]) else: tile_indexes = (pdim-1,) - tile_indexes = list( pdim-1-idx for idx in tile_indexes) + tile_indexes = list(pdim-1-idx for idx in tile_indexes) wdim = min(len(axes), max_device_workdim) extra_work_indexes = [] work_shape = np.empty(shape=(wdim,), dtype=np.int32) - j=0 + j = 0 for i, Si in enumerate(shape): - if i==0: + if i == 0: work_shape[0] = Si elif i in tile_indexes: work_shape[j] = Si - elif i < (wdim - int(contiguous_permutation and tile_indexes[1]>wdim-1)): + elif i < (wdim - int(contiguous_permutation and tile_indexes[1] > wdim-1)): work_shape[j] = Si else: continue - j+=1 + j += 1 return (contiguous_permutation, wdim, work_shape, tile_indexes) @classmethod @@ -71,31 +72,31 @@ class TransposeKernelGenerator(KernelCodeGenerator): tile_indexes = (pdim-1, axes[-1]) else: tile_indexes = (pdim-1,) - tile_indexes = tuple( pdim-1-idx for idx in tile_indexes ) + tile_indexes = tuple(pdim-1-idx for idx in tile_indexes) assert work_dim <= pdim, 'workdim to big.' assert work_dim >= (1 + int(contiguous_permutation)), 'workdim to small.' wdim = work_dim max_local_worksize = np.empty(shape=(wdim,), dtype=np.int32) - j=0 - for i,Si in enumerate(shape): - if i==0: + j = 0 + for i, Si in enumerate(shape): + if i == 0: max_local_worksize[j] = (tile_size+vectorization-1) // vectorization elif i in tile_indexes: max_local_worksize[j] = tile_size - elif i < (wdim - int(contiguous_permutation and tile_indexes[1]>wdim-1)): + elif i < (wdim - int(contiguous_permutation and tile_indexes[1] > wdim-1)): max_local_worksize[j] = 1 else: continue - j+=1 - assert j==wdim, '{} != {}'.format(j, wdim) + j += 1 + assert j == wdim, '{} != {}'.format(j, wdim) return max_local_worksize @classmethod def compute_global_size(cls, shape, tile_size, - vectorization, axes, - local_work_size, work_load): + vectorization, axes, + local_work_size, work_load): pdim = len(axes) contiguous_permutation = (axes[-1] != (pdim-1)) @@ -103,7 +104,7 @@ class TransposeKernelGenerator(KernelCodeGenerator): tile_indexes = (pdim-1, axes[-1]) else: tile_indexes = (pdim-1,) - tile_indexes = tuple( pdim-1-idx for idx in tile_indexes ) + tile_indexes = tuple(pdim-1-idx for idx in tile_indexes) wdim = len(local_work_size) assert wdim <= pdim, 'workdim to big.' @@ -111,22 +112,22 @@ class TransposeKernelGenerator(KernelCodeGenerator): ngroups = np.empty(shape=(wdim,), dtype=np.int32) vts = tile_size * vectorization - ts = tile_size - j=0 - for i,Si in enumerate(shape): - if i==0: + ts = tile_size + j = 0 + for i, Si in enumerate(shape): + if i == 0: wl = work_load[j] - ngroups[j] = (Si+vts*wl-1)//(vts*wl) + ngroups[j] = (Si+vts*wl-1)//(vts*wl) elif i in tile_indexes: wl = work_load[j] ngroups[j] = ((Si+ts*wl-1)//(ts*wl)) - elif i < (wdim - int(contiguous_permutation and tile_indexes[1]>wdim-1)): + elif i < (wdim - int(contiguous_permutation and tile_indexes[1] > wdim-1)): wl = work_load[j] ngroups[j] = (Si+wl-1)//wl else: continue - j+=1 - assert j==wdim, '{} != {}'.format(j, wdim) + j += 1 + assert j == wdim, '{} != {}'.format(j, wdim) global_size = ngroups * local_work_size return global_size @@ -134,8 +135,8 @@ class TransposeKernelGenerator(KernelCodeGenerator): """ Return a tuple of required (static,dynamic,total) cache bytes per workgroup """ - dtype = self.dtype - nbytes = np.dtype(dtype).itemsize + dtype = self.dtype + nbytes = np.dtype(dtype).itemsize tile_shape = self.tile_shape tile_bytes = prod(tile_shape) * nbytes @@ -151,30 +152,30 @@ class TransposeKernelGenerator(KernelCodeGenerator): dc = 0 tc = sc+dc - return (sc,dc,tc) + return (sc, dc, tc) def __init__(self, typegen, ctype, vectorization, - axes, tile_size, tile_padding, symbolic_mode, - use_diagonal_coordinates = True, - is_inplace = False, - known_vars = None, - debug_mode = False, - tuning_mode = False, - **kargs): + axes, tile_size, tile_padding, symbolic_mode, + use_diagonal_coordinates=True, + is_inplace=False, + known_vars=None, + debug_mode=False, + tuning_mode=False, + **kargs): axes = np.asarray(axes) pdim = axes.size Pdim = upper_pow2_or_3(pdim) assert pdim <= 16, 'Maximal permutation dimension is 16.' - assert Pdim in [1,2,3,4,8,16] - assert vectorization in [1,2,4,8,16] + assert Pdim in [1, 2, 3, 4, 8, 16] + assert vectorization in [1, 2, 4, 8, 16] assert tile_padding >= 0 # check permutation axes - msg='Invalid permutation {} for dimension {}.' - msg=msg.format(axes, pdim) + msg = 'Invalid permutation {} for dimension {}.' + msg = msg.format(axes, pdim) assert axes.size == pdim, msg - assert (axes<pdim).all(), msg + assert (axes < pdim).all(), msg _axes = set(axes.tolist()) if len(_axes) != pdim: raise ValueError(msg) @@ -182,8 +183,8 @@ class TransposeKernelGenerator(KernelCodeGenerator): _permutation = (axes != set(range(pdim))) _naxes = sum(_permutation) if (_naxes == 0): - msg='There is nothing to transpose with given axes {}.' - msg=msg.format(axes) + msg = 'There is nothing to transpose with given axes {}.' + msg = msg.format(axes) raise ValueError(msg) assert _naxes >= 2, msg @@ -196,49 +197,49 @@ class TransposeKernelGenerator(KernelCodeGenerator): tile_indexes = (pdim-1, axes[-1]) else: tile_indexes = (pdim-1,) - tile_axes = tuple( pdim-1-axes[idx] for idx in tile_indexes ) - tile_indexes = tuple( pdim-1-idx for idx in tile_indexes ) + tile_axes = tuple(pdim-1-axes[idx] for idx in tile_indexes) + tile_indexes = tuple(pdim-1-idx for idx in tile_indexes) - permutation_axes = tuple( pdim-1-i for i,idx in enumerate(axes) if i!= idx ) + permutation_axes = tuple(pdim-1-i for i, idx in enumerate(axes) if i != idx) tdim = len(tile_indexes) Tdim = upper_pow2_or_3(tdim) - tile_shape = [tile_size,]*tdim + tile_shape = [tile_size, ]*tdim tile_shape[-1] += tile_padding tile_shape = tuple(tile_shape) - is_tile_index = tuple( (i in tile_indexes) for i in range(pdim) ) - tile_index_to_id = dict( (j,i) for (i,j) in enumerate(tile_indexes) ) + is_tile_index = tuple((i in tile_indexes) for i in range(pdim)) + tile_index_to_id = dict((j, i) for (i, j) in enumerate(tile_indexes)) device = typegen.device if (device.max_work_item_dimensions < tdim): - msg='OpenCL device {} does not support {} working dimensions required ' - msg+='to transpose whith axes {}.' - msg=msg.format(device.name, tdim, axes) + msg = 'OpenCL device {} does not support {} working dimensions required ' + msg += 'to transpose whith axes {}.' + msg = msg.format(device.name, tdim, axes) work_dim = min(pdim, device.max_work_item_dimensions) - workload_indexes = tuple ( i for i in range(pdim) - if (not is_tile_index[i]) and - (i < (work_dim- int(contiguous_permutation and (tile_indexes[1]>work_dim-1))))) - is_workload_index = tuple( (i in workload_indexes) for i in range(pdim) ) - wl_index_to_id = dict( (j,i) for (i,j) in enumerate(workload_indexes) ) + workload_indexes = tuple(i for i in range(pdim) + if (not is_tile_index[i]) and + (i < (work_dim - int(contiguous_permutation and (tile_indexes[1] > work_dim-1))))) + is_workload_index = tuple((i in workload_indexes) for i in range(pdim)) + wl_index_to_id = dict((j, i) for (i, j) in enumerate(workload_indexes)) wldim = len(workload_indexes) WLdim = upper_pow2_or_3(wldim) name = TransposeKernelGenerator.codegen_name(is_inplace, axes, ctype, - tile_size, tile_padding, vectorization, use_diagonal_coordinates) + tile_size, tile_padding, vectorization, use_diagonal_coordinates) kernel_args = self.gen_kernel_arguments(typegen, ctype, Pdim, debug_mode, is_inplace, - known_vars, symbolic_mode) + known_vars, symbolic_mode) - super(self.__class__,self).__init__( - name=name, - typegen=typegen, - work_dim=work_dim, - known_vars = known_vars, - kernel_args = kernel_args, - symbolic_mode=symbolic_mode, - **kargs) + super(TransposeKernelGenerator, self).__init__( + name=name, + typegen=typegen, + work_dim=work_dim, + known_vars=known_vars, + kernel_args=kernel_args, + symbolic_mode=symbolic_mode, + **kargs) dtype = ctype_to_dtype(ctype) @@ -256,39 +257,40 @@ class TransposeKernelGenerator(KernelCodeGenerator): print(' *is_tile_index: {}'.format(is_tile_index)) print(' *is_workload_index: {}'.format(is_workload_index)) print(' *work_dim: {} (tile[{}] + device_workload[{}])'.format(work_dim, - tdim, work_dim-tdim)) + tdim, work_dim-tdim)) print(' *ctype: {}'.format(ctype)) print(' *dtype: {}'.format(dtype)) - self.ctype = ctype - self.dtype = dtype - self.axes = axes - self.pdim = pdim - self.Pdim = Pdim - self.tdim = tdim - self.Tdim = Tdim - self.wldim = wldim - self.WLdim = WLdim - self.tile_size = tile_size - self.tile_shape = tile_shape - self.tile_padding = tile_padding - self.tile_indexes = tile_indexes - self.tile_axes = tile_axes - self.tile_index_to_id = tile_index_to_id - self.workload_indexes = workload_indexes - self.wl_index_to_id = wl_index_to_id - self.is_tile_index = is_tile_index + self.ctype = ctype + self.dtype = dtype + self.axes = axes + self.pdim = pdim + self.Pdim = Pdim + self.tdim = tdim + self.Tdim = Tdim + self.wldim = wldim + self.WLdim = WLdim + self.tile_size = tile_size + self.tile_shape = tile_shape + self.tile_padding = tile_padding + self.tile_indexes = tile_indexes + self.tile_axes = tile_axes + self.tile_index_to_id = tile_index_to_id + self.workload_indexes = workload_indexes + self.wl_index_to_id = wl_index_to_id + self.is_tile_index = is_tile_index self.is_workload_index = is_workload_index - self.is_inplace = is_inplace - self.permutation_axes = permutation_axes - self.vectorization = vectorization + self.is_inplace = is_inplace + self.permutation_axes = permutation_axes + self.vectorization = vectorization self.contiguous_permutation = contiguous_permutation self.use_diagonal_coordinates = use_diagonal_coordinates self.gencode() - def cache_alloc_bytes(self,local_size): + def cache_alloc_bytes(self, local_size): pass + def required_cache_size(self): return self.tile_size @@ -296,125 +298,123 @@ class TransposeKernelGenerator(KernelCodeGenerator): reqs = WriteOnceDict() return reqs - def gen_kernel_arguments(self, typegen, ctype, Pdim, debug_mode, is_inplace, - known_vars, symbolic_mode): + known_vars, symbolic_mode): _global = OpenClCodeGenerator.default_keywords['global'] tg = typegen mesh_dim = Pdim - kargs = ArgDict() + kargs = ArgDict() if is_inplace: data, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='inout', - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ctype, typegen=typegen, - mesh_dim=mesh_dim, const=False, ptr_restrict=True) + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ctype, typegen=typegen, + mesh_dim=mesh_dim, const=False, ptr_restrict=True) self.inout_strides = strides self.inout_data = data else: in_data, in_strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='in', - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ctype, typegen=typegen, - mesh_dim=mesh_dim, const=True, ptr_restrict=True) + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ctype, typegen=typegen, + mesh_dim=mesh_dim, const=True, ptr_restrict=True) out_data, out_strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='out', - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ctype, typegen=typegen, - mesh_dim=mesh_dim, const=False, ptr_restrict=True) - self.in_data = in_data - self.out_data = out_data - self.in_strides = in_strides + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ctype, typegen=typegen, + mesh_dim=mesh_dim, const=False, ptr_restrict=True) + self.in_data = in_data + self.out_data = out_data + self.in_strides = in_strides self.out_strides = out_strides if debug_mode: n_dbg_arrays = self.n_dbg_arrays for i in range(n_dbg_arrays): - kargs['dbg{}'.format(i)] = CodegenVariable(ctype='int', name='dbg{}'.format(i), - typegen=tg, storage=_global, ptr=True, - ptr_const=True, ptr_restrict=True, nl=True) + kargs['dbg{}'.format(i)] = CodegenVariable(ctype='int', name='dbg{}'.format(i), + typegen=tg, storage=_global, ptr=True, + ptr_const=True, ptr_restrict=True, nl=True) self.debug_mode = debug_mode kargs['shape'] = CodegenVectorClBuiltin(btype='int', dim=Pdim, - name='shape', typegen=tg, add_impl_const=True, symbolic_mode=True) + name='shape', typegen=tg, add_impl_const=True, symbolic_mode=True) return kargs - def gencode(self): kernel_reqs = self.build_requirements() self.update_requirements(kernel_reqs) - _local = OpenClCodeGenerator.default_keywords['local'] + _local = OpenClCodeGenerator.default_keywords['local'] - s = self - tg = s.typegen - work_dim = s.work_dim - symbolic_mode = s.symbolic_mode + s = self + tg = s.typegen + work_dim = s.work_dim + symbolic_mode = s.symbolic_mode - ctype = s.ctype - dtype = s.dtype + ctype = s.ctype + dtype = s.dtype - pdim = s.pdim - Pdim = s.Pdim - tdim = s.tdim - Tdim = s.Tdim - wldim = s.wldim - WLdim = s.WLdim + pdim = s.pdim + Pdim = s.Pdim + tdim = s.tdim + Tdim = s.Tdim + wldim = s.wldim + WLdim = s.WLdim - axes = s.axes + axes = s.axes permutation_axes = s.permutation_axes - tile_size = s.tile_size - tile_shape = s.tile_shape - tile_padding = s.tile_padding - tile_indexes = s.tile_indexes - tile_axes = s.tile_axes + tile_size = s.tile_size + tile_shape = s.tile_shape + tile_padding = s.tile_padding + tile_indexes = s.tile_indexes + tile_axes = s.tile_axes tile_index_to_id = s.tile_index_to_id workload_indexes = s.workload_indexes - wl_index_to_id = s.wl_index_to_id + wl_index_to_id = s.wl_index_to_id - is_tile_index = s.is_tile_index + is_tile_index = s.is_tile_index is_workload_index = s.is_workload_index - is_inplace = s.is_inplace - vectorization = s.vectorization + is_inplace = s.is_inplace + vectorization = s.vectorization - contiguous_permutation = s.contiguous_permutation + contiguous_permutation = s.contiguous_permutation use_diagonal_coordinates = s.use_diagonal_coordinates - debug_mode = s.debug_mode + debug_mode = s.debug_mode n_dbg_arrays = s.n_dbg_arrays - local_id = s.vars['local_id'] - local_size = s.vars['local_size'] - group_id = s.vars['group_id'] - group_size = s.vars['num_groups'] + local_id = s.vars['local_id'] + local_size = s.vars['local_size'] + group_id = s.vars['group_id'] + group_size = s.vars['num_groups'] global_size = s.vars['global_size'] - global_id = s.vars['global_id'] + global_id = s.vars['global_id'] S = s.vars['shape'] if is_inplace: - _in = self.inout_data - _out = self.inout_data - _in_strides = self.inout_strides + _in = self.inout_data + _out = self.inout_data + _in_strides = self.inout_strides _out_strides = self.inout_strides else: - _in = self.in_data - _out = self.out_data - _in_strides = self.in_strides + _in = self.in_data + _out = self.out_data + _in_strides = self.in_strides _out_strides = self.out_strides if debug_mode: - dbg = [ s.vars['dbg{}'.format(i)] for i in range(n_dbg_arrays) ] + dbg = [s.vars['dbg{}'.format(i)] for i in range(n_dbg_arrays)] if is_inplace and S.known(): - msg='Permutated shape axis should form an hypercube for inplace transpositions.' - Sp = S.value[np.asarray(permutation_axes)] + msg = 'Permutated shape axis should form an hypercube for inplace transpositions.' + Sp = S.value[np.asarray(permutation_axes)] Sp0 = S.value[permutation_axes[0]] - assert (Sp==Sp0).all(), msg + assert (Sp == Sp0).all(), msg tile_size = CodegenVariable(typegen=tg, name='tile_size', ctype='int', - const=True, value=self.tile_size, symbolic_mode=symbolic_mode) + const=True, value=self.tile_size, symbolic_mode=symbolic_mode) tile_padding = CodegenVariable(typegen=tg, name='tile_padding', ctype='int', - const=True, value=self.tile_padding, symbolic_mode=symbolic_mode) + const=True, value=self.tile_padding, symbolic_mode=symbolic_mode) tile_sshape = [tile_size.value]*tdim tile_sshape[-1] = '{}+{}'.format(tile_size.value, tile_padding.value) @@ -422,13 +422,13 @@ class TransposeKernelGenerator(KernelCodeGenerator): ntiles = '(({}+{}-1)/{})'.format(S, tile_size, tile_size) ntiles = CodegenVectorClBuiltin('ntiles', 'int', Pdim, tg, const=True, - init = ntiles) + init=ntiles) nwork = '(({}+{}-1)/{})'.format(S[:work_dim], local_size, local_size) nwork = CodegenVectorClBuiltin('nwork', 'int', work_dim, tg, const=True, - init = nwork) + init=nwork) - idx = CodegenVectorClBuiltin('idx', 'int', Pdim, tg) + idx = CodegenVectorClBuiltin('idx', 'int', Pdim, tg) bidx = CodegenVectorClBuiltin('bidx', 'int', Tdim, tg) tidx = CodegenVectorClBuiltin('tidx', 'int', Tdim, tg) lidx = CodegenVectorClBuiltin('lidx', 'int', Tdim, tg) @@ -438,72 +438,72 @@ class TransposeKernelGenerator(KernelCodeGenerator): tile_offset_in, tile_offset_out = '', '' local_offset_in, local_offset_out = '', '' - ki=kj=tdim-1 + ki = kj = tdim-1 for k in range(pdim): i = pdim-1-k j = pdim-1-axes[k] - if i==pdim-1: - tile_offset_in = '{}*{}'.format(_in_strides[i], idx[i]) + if i == pdim-1: + tile_offset_in = '{}*{}'.format(_in_strides[i], idx[i]) tile_offset_out = '{}*{}'.format(_out_strides[i], idx[j]) else: - tile_offset_in += ' $+ {}*{}'.format(_in_strides[i], idx[i]) + tile_offset_in += ' $+ {}*{}'.format(_in_strides[i], idx[i]) tile_offset_out += ' $+ {}*{}'.format(_out_strides[i], idx[j]) if i in tile_indexes: - if ki==tdim-1: + if ki == tdim-1: local_offset_in = '{}*{}'.format(_in_strides[i], lidx[ki]) else: local_offset_in += ' $+ {}*{}'.format(_in_strides[i], lidx[ki]) - ki-=1 + ki -= 1 if j in tile_indexes: - if kj==tdim-1: + if kj == tdim-1: local_offset_out = '{}*{}'.format(_out_strides[i], lidx[kj]) else: local_offset_out += ' $+ {}*{}'.format(_out_strides[i], lidx[kj]) kj -= 1 - assert ki==-1 - assert kj==-1 + assert ki == -1 + assert kj == -1 tile_id = '' block_id = '' loc_id = '' - for i in range(tdim-1,-1,-1): + for i in range(tdim-1, -1, -1): if i == tdim-1: - tile_id = '{}'.format(tidx[i]) + tile_id = '{}'.format(tidx[i]) block_id = '{}'.format(bidx[i]) else: - tile_id = '({}*{}+{})'.format(tile_id, ntiles[i], tidx[i]) + tile_id = '({}*{}+{})'.format(tile_id, ntiles[i], tidx[i]) block_id = '({}*{}+{})'.format(block_id, ntiles[i], bidx[i]) - for i in range(work_dim-1,-1,-1): + for i in range(work_dim-1, -1, -1): if i == work_dim-1: loc_id = '{}'.format(local_id[i]) else: loc_id = '({}*{}+{})'.format(loc_id, local_size[i], local_id[i]) - tile_offset_in = CodegenVariable('tile_offset_in', 'ulong', tg, - init=tile_offset_in, const=True) + tile_offset_in = CodegenVariable('tile_offset_in', 'ulong', tg, + init=tile_offset_in, const=True) tile_offset_out = CodegenVariable('tile_offset_out', 'ulong', tg, - init=tile_offset_out, const=True) + init=tile_offset_out, const=True) local_offset_in = CodegenVariable('local_offset_in', 'ulong', tg, - init=local_offset_in, const=True) + init=local_offset_in, const=True) local_offset_out = CodegenVariable('local_offset_out', 'ulong', tg, - init=local_offset_out, const=True) + init=local_offset_out, const=True) TID = CodegenVariable('TID', 'int', tg, const=True, - init=tile_id) + init=tile_id) BID = CodegenVariable('BID', 'int', tg, const=True, - init=block_id) + init=block_id) LID = CodegenVariable('LID', 'int', tg, const=True, - init=loc_id) + init=loc_id) active = CodegenVariable('active', 'bool', tg) active_cond = ' && '.join(['({}<{})'.format(idx[i], S[i]) for i in range(pdim) - if (i < (work_dim- int(contiguous_permutation and (tile_indexes[1]>work_dim-1)))) - and (i not in tile_indexes)]) + if (i < (work_dim - int(contiguous_permutation and (tile_indexes[1] > work_dim-1)))) + and (i not in tile_indexes)]) @contextmanager def _block_iterate_(i): @@ -512,14 +512,14 @@ class TransposeKernelGenerator(KernelCodeGenerator): tid = tile_index_to_id[i] imin = min(i, work_dim-1) loop = '{i}={ig}; {i}<{N}; {i}+={ng}'.format(i=bidx[tid], - ig=group_id[imin], N=ntiles[i], - ng=group_size[imin]) + ig=group_id[imin], N=ntiles[i], + ng=group_size[imin]) unroll = True elif is_workload_index[i]: wid = wl_index_to_id[i] loop = '{i}={ig}; {i}<{N}; {i}+={ng}'.format(i=kidx[wid], - ig=group_id[i], ng=group_size[i], - N=nwork[i]) + ig=group_id[i], ng=group_size[i], + N=nwork[i]) unroll = True else: loop = '{i}=0; {i}<{N}; {i}+=1'.format(i=idx[i], N=S[i]) @@ -533,54 +533,53 @@ class TransposeKernelGenerator(KernelCodeGenerator): def _tile_iterate(i, tile_idx): try: loop = '{var}={lid}; ({var}<{N}) && ({glob}+{var} < {S}); {var}+={L}'.format(i=i, - var=lidx[i], - glob=idx[tile_idx], - lid=local_id[i], - L=local_size[i], - N=tile_size, - S=S[tile_idx]) + var=lidx[i], + glob=idx[tile_idx], + lid=local_id[i], + L=local_size[i], + N=tile_size, + S=S[tile_idx]) unroll = True with s._for_(loop, unroll=unroll) as ctx: yield ctx except: raise - block_loops = [ _block_iterate_(i) for i in range(pdim) ][::-1] + block_loops = [_block_iterate_(i) for i in range(pdim)][::-1] if is_inplace and contiguous_permutation: tile0 = CodegenArray(typegen=tg, name='tile0', ctype=ctype, storage='__local', - dim=tdim, shape=tile_sshape) + dim=tdim, shape=tile_sshape) tile1 = CodegenArray(typegen=tg, name='tile1', ctype=ctype, storage='__local', - dim=tdim, shape=tile_sshape) + dim=tdim, shape=tile_sshape) tiles = (tile0, tile1) - tile_loops_in0 = [ _tile_iterate(i,j) for i,j in enumerate(tile_indexes) ][::-1] - tile_loops_out0 = [ _tile_iterate(i,j) for i,j in enumerate(tile_indexes[::-1]) ][::-1] - tile_loops_in1 = [ _tile_iterate(i,j) for i,j in enumerate(tile_indexes[::-1]) ][::-1] - tile_loops_out1 = [ _tile_iterate(i,j) for i,j in enumerate(tile_indexes) ][::-1] + tile_loops_in0 = [_tile_iterate(i, j) for i, j in enumerate(tile_indexes)][::-1] + tile_loops_out0 = [_tile_iterate(i, j) for i, j in enumerate(tile_indexes[::-1])][::-1] + tile_loops_in1 = [_tile_iterate(i, j) for i, j in enumerate(tile_indexes[::-1])][::-1] + tile_loops_out1 = [_tile_iterate(i, j) for i, j in enumerate(tile_indexes)][::-1] - tile_in0 = tile0() + ''.join(['[{}]'.format(lidx[i]) - for i in range(tdim)][::-1]) + tile_in0 = tile0() + ''.join(['[{}]'.format(lidx[i]) + for i in range(tdim)][::-1]) tile_out0 = tile0() + ''.join(['[{}]'.format(lidx[axes.tolist().index(axes[i])]) - for i in range(tdim)]) + for i in range(tdim)]) tile_in1 = tile1() + ''.join(['[{}]'.format(lidx[axes.tolist().index(axes[i])]) - for i in range(tdim)]) - tile_out1 = tile1() + ''.join(['[{}]'.format(lidx[i]) - for i in range(tdim)][::-1]) + for i in range(tdim)]) + tile_out1 = tile1() + ''.join(['[{}]'.format(lidx[i]) + for i in range(tdim)][::-1]) else: tile = CodegenArray(typegen=tg, name='tile', ctype=ctype, storage='__local', - dim=tdim, shape=tile_sshape) + dim=tdim, shape=tile_sshape) tiles = (tile,) - tile_loops_in0 = [ _tile_iterate(i,j) for i,j in enumerate(tile_indexes) ][::-1] - tile_loops_out0 = [ _tile_iterate(i,j) for i,j in enumerate(tile_indexes[::-1]) ][::-1] + tile_loops_in0 = [_tile_iterate(i, j) for i, j in enumerate(tile_indexes)][::-1] + tile_loops_out0 = [_tile_iterate(i, j) for i, j in enumerate(tile_indexes[::-1])][::-1] - tile_in0 = tile() + ''.join(['[{}]'.format(lidx[i]) - for i in range(tdim)][::-1]) + tile_in0 = tile() + ''.join(['[{}]'.format(lidx[i]) + for i in range(tdim)][::-1]) tile_out0 = tile() + ''.join(['[{}]'.format(lidx[axes.tolist().index(axes[i])]) - for i in range(tdim)]) - + for i in range(tdim)]) - #include complex definitions if required + # include complex definitions if required with s._codeblock_('pragma_extensions'): if (ctype == 'cdouble_t'): s.define('PYOPENCL_DEFINE_CDOUBLE') @@ -589,15 +588,15 @@ class TransposeKernelGenerator(KernelCodeGenerator): with s._kernel_(): with s._align_() as al: - tile_size.declare(al,align=True) - tile_padding.declare(al,align=True) + tile_size.declare(al, align=True) + tile_padding.declare(al, align=True) s.jumpline() s.decl_aligned_vars(global_id, local_id, group_id, global_size, local_size, group_size, const=True) ptrs = (_in,) if not is_inplace: - ptrs+=(_out,) + ptrs += (_out,) s.decl_aligned_vars(*ptrs) s.jumpline() nwork.declare(s) @@ -609,7 +608,7 @@ class TransposeKernelGenerator(KernelCodeGenerator): s.jumpline() comment = \ -'''Iteration over all active and non active axes. + '''Iteration over all active and non active axes. Inactive axes are the ones that are not permutated. Active axes contain tiles that should be permutated. Each workgroup handle a certain amount of tiles on those active axes, @@ -620,7 +619,7 @@ The same workgroup writes than back the permutated tile into the output array. The two first block coordinates of a tile can be mapped to diagonal coordinates to prevent memory camping that may occur during global input read or output write (tiles are not wide enough in the contiguous direction).''' - #s.comment(comment) + # s.comment(comment) s.decl_vars(idx) s.decl_vars(bidx, tidx) if wldim: @@ -647,9 +646,9 @@ to prevent memory camping that may occur during global input read or output writ if wldim: s.comment('Adjust global offset index using the workload index') code = '{} = {}*{}+{};'.format(idx[workload_indexes], - kidx[:wldim], - local_size[workload_indexes], - local_id[workload_indexes]) + kidx[:wldim], + local_size[workload_indexes], + local_id[workload_indexes]) s.append(code) s.jumpline() @@ -660,9 +659,9 @@ to prevent memory camping that may occur during global input read or output writ s.comment('Determine if this index is active') if is_inplace: - assert len(permutation_axes)==2 + assert len(permutation_axes) == 2 acond = '({}<={})'.format(idx[permutation_axes[-2]], - idx[permutation_axes[-1]]) + idx[permutation_axes[-1]]) if active_cond: active_cond += ' && {}'.format(acond) else: @@ -679,12 +678,12 @@ to prevent memory camping that may occur during global input read or output writ with nested(*tile_loops_in0): local_offset_in.declare(s) s.append('{} = {};'.format(tile_in0, _in['{}+{}'.format(tile_offset_in, - local_offset_in)])) + local_offset_in)])) if is_inplace: with nested(*tile_loops_in1): local_offset_out.declare(s) s.append('{} = {};'.format(tile_in1, _out['{}+{}'.format(tile_offset_out, - local_offset_out)])) + local_offset_out)])) s.barrier(_local=True) s.jumpline() @@ -693,20 +692,20 @@ to prevent memory camping that may occur during global input read or output writ with nested(*tile_loops_out0): local_offset_out.declare(s) s.append('{} = {};'.format(_out['{}+{}'.format(tile_offset_out, - local_offset_out)], tile_out0)) + local_offset_out)], tile_out0)) if is_inplace: with nested(*tile_loops_out1): local_offset_in.declare(s) s.append('{} = {};'.format(_in['{}+{}'.format(tile_offset_in, - local_offset_in)], tile_out1)) + local_offset_in)], tile_out1)) s.barrier(_local=True) else: with s._if_(active): with tile_loops_in0[0]: if is_inplace: - offset_in='{}$+{}'.format(tile_offset_in, lidx[0]) - offset_out='{}$+{}'.format(tile_offset_out, lidx[0]) + offset_in = '{}$+{}'.format(tile_offset_in, lidx[0]) + offset_out = '{}$+{}'.format(tile_offset_out, lidx[0]) tmp_load = self.vload(vectorization, _inout, offset_out) tmp.affect(s, init=tmp_load) @@ -719,8 +718,8 @@ to prevent memory camping that may occur during global input read or output writ s.append(tmp_store) else: code = '{} = {};'.format( - _out['{}+{}'.format(tile_offset_out, lidx[0])], - _in['{}+{}'.format(tile_offset_in, lidx[0])]) + _out['{}+{}'.format(tile_offset_out, lidx[0])], + _in['{}+{}'.format(tile_offset_in, lidx[0])]) s.append(code) @@ -728,14 +727,14 @@ if __name__ == '__main__': from hysop.backend.device.codegen.base.test import _test_typegen tg = _test_typegen('float') ek = TransposeKernelGenerator(typegen=tg, - ctype='short', - vectorization=4, - axes=(2,1,0,4,3), - tile_size=8, tile_padding=1, - is_inplace=False, - symbolic_mode=True, - known_vars={ - 'shape': (256,128,64,32,16,0,0,0) - }) + ctype='short', + vectorization=4, + axes=(2, 1, 0, 4, 3), + tile_size=8, tile_padding=1, + is_inplace=False, + symbolic_mode=True, + known_vars={ + 'shape': (256, 128, 64, 32, 16, 0, 0, 0) + }) ek.edit() ek.test_compile() diff --git a/hysop/backend/device/codegen/symbolic/cast.py b/hysop/backend/device/codegen/symbolic/cast.py index ff9f19f47..4a68a91bd 100644 --- a/hysop/backend/device/codegen/symbolic/cast.py +++ b/hysop/backend/device/codegen/symbolic/cast.py @@ -7,28 +7,29 @@ from hysop.backend.device.opencl.opencl_types import vtype, basetype, components from hysop.backend.device.codegen.symbolic.expr import OpenClConvert, OpenClCast, OpenClBool, TypedI from hysop.backend.device.codegen.symbolic.misc import OpenClBroadCast, OpenClExpand + class OpenClCastUtils(object): - ## SCALAR RANKS + # SCALAR RANKS # scalar ranks defined as Usual Arithmetic Conversions # http://www.informit.com/articles/article.aspx?p=1732873&seqNum=6 # and section 6.3.1.8 of the C99 specification. __type_ranks = [npw.float64, npw.float32, npw.float16, - npw.uint64, npw.int64, - npw.uint32, npw.int32, - npw.uint16, npw.int16, - npw.uint8, npw.int8, - npw.bool_] + npw.uint64, npw.int64, + npw.uint32, npw.int32, + npw.uint16, npw.int16, + npw.uint8, npw.int8, + npw.bool_] type_ranks = dict(zip(__type_ranks, range(len(__type_ranks)-1, -1, -1))) - ## OPENCL IMPLICIT CONVERSION RULES + # OPENCL IMPLICIT CONVERSION RULES # scalar op scalar => scalar promotion to higher rank type, nothing to do # scalar op vector => vector promotion if rank[stype] <= rank[btype] where btype is the vector base type, else compile time error # vector op vector => valid only if vtype0 == vtype1, else compile time error @classmethod def promote_expressions_to_required_rank(cls, exprs, broadcast_args=False): - assert len(exprs)>0 + assert len(exprs) > 0 vector_types, vector_ranks, vector_components = (), (), () max_scalar_rank, max_scalar_type = None, None @@ -38,12 +39,12 @@ class OpenClCastUtils(object): ebase = expr.btype dtype = ctype_to_dtype(ebase) rank = cls.type_ranks[dtype] - if (ecomponents>1): + if (ecomponents > 1): vector_types += (ebase,) vector_ranks += (rank,) vector_components += (ecomponents,) else: - if (max_scalar_rank is None) or (rank>max_scalar_rank): + if (max_scalar_rank is None) or (rank > max_scalar_rank): max_scalar_rank = rank max_scalar_type = ebase @@ -53,16 +54,16 @@ class OpenClCastUtils(object): return exprs, max_scalar_type, max_scalar_type vcomponents = npw.asarray(vector_components) - vcomponents = vcomponents[vcomponents>1] - if (not broadcast_args) and (vcomponents.size>0): + vcomponents = vcomponents[vcomponents > 1] + if (not broadcast_args) and (vcomponents.size > 0): if (vcomponents != vcomponents[0]).all(): - msg='Vector size mismtach in expressions:' + msg = 'Vector size mismtach in expressions:' for e in exprs: - msg+='\n *{}: {}'.format(e.ctype, str(e)) - msg+='\n' + msg += '\n *{}: {}'.format(e.ctype, str(e)) + msg += '\n' raise RuntimeError(msg) - max_vector_rank = max(vector_ranks) + max_vector_rank = max(vector_ranks) max_vector_components = max(vector_components) vector_btype = vector_types[vector_ranks.index(max_vector_rank)] vector_vtype = '{}{}'.format(vector_btype, max_vector_components) @@ -70,21 +71,21 @@ class OpenClCastUtils(object): promoted_exprs = () for expr in exprs: # cast all vectors to max rank and broadcast if required - et = expr.ctype - ebase = expr.btype + et = expr.ctype + ebase = expr.btype ecomponents = expr.components dtype = ctype_to_dtype(ebase) - rank = cls.type_ranks[dtype] - if (ecomponents>1): + rank = cls.type_ranks[dtype] + if (ecomponents > 1): assert (max_vector_components % ecomponents == 0) broadcast_factor = (max_vector_components // ecomponents) if (rank != max_vector_rank): expr = OpenClConvert(et, expr) - if (broadcast_factor>1): + if (broadcast_factor > 1): expr = OpenClBroadCast(None, expr, broadcast_factor) else: if (rank > max_vector_rank): - et = vector_btype + et = vector_btype expr = OpenClCast(et, expr) promoted_exprs += (expr,) @@ -92,7 +93,7 @@ class OpenClCastUtils(object): @classmethod def promote_expressions_to_float(cls, exprs): - dtypes = tuple( ctype_to_dtype(e.btype) for e in exprs ) + dtypes = tuple(ctype_to_dtype(e.btype) for e in exprs) common_dtype = find_common_dtype(*dtypes) float_dtype = match_float_type(common_dtype) if float_dtype == npw.float16: @@ -102,10 +103,9 @@ class OpenClCastUtils(object): fbtype = dtype_to_ctype(float_dtype) promoted = tuple(cls.promote_basetype_to(e, fbtype) - for e in exprs) + for e in exprs) return promoted, fbtype - @classmethod def promote_to(cls, expr, target_type): if (expr.ctype == target_type): @@ -115,9 +115,9 @@ class OpenClCastUtils(object): tbase, tcomponents = basetype(target_type), components(target_type) if (ecomponents != tcomponents): - if (ecomponents>1): - msg='Components mismatch {} vs {}.' - msg=msg.format(ecomponents, tcomponents) + if (ecomponents > 1): + msg = 'Components mismatch {} vs {}.' + msg = msg.format(ecomponents, tcomponents) raise RuntimeError(msg) return OpenClCast(target_type, expr) return OpenClConvert(target_type, expr) @@ -132,28 +132,28 @@ class OpenClCastUtils(object): @classmethod def promote_expressions_to_required_signature(cls, exprs, signature, ret, - expand=None): + expand=None): exprs = to_tuple(exprs) signature = to_tuple(signature) expand = to_tuple(first_not_None(expand, (False,)*len(exprs))) assert len(exprs) == len(signature) == len(expand) > 0 check_instance(expand, tuple, values=bool, allow_none=False) check_instance(exprs, tuple, values=TypedI, allow_none=False) - check_instance(signature, tuple, values=(type(None),str), allow_none=False) - check_instance(ret, (str,int), allow_none=True) + check_instance(signature, tuple, values=(type(None), str), allow_none=False) + check_instance(ret, (str, int), allow_none=True) - dtypes = tuple( e.dtype for e in exprs ) - components = tuple( e.components for e in exprs ) + dtypes = tuple(e.dtype for e in exprs) + components = tuple(e.components for e in exprs) common_dtype = find_common_dtype(*dtypes) n = max(components) _exprs = () - for (e,s,exp) in zip(exprs, signature, expand): + for (e, s, exp) in zip(exprs, signature, expand): ecomponents = e.components is_bool = (s == 'btype') if is_bool: - s='itype' + s = 'itype' if (s is None): dtype = common_dtype elif isinstance(s, str): @@ -165,18 +165,18 @@ class OpenClCastUtils(object): ctype = e.vtype(btype, ecomponents) vtype = e.vtype(btype, n) - if (ecomponents>1): + if (ecomponents > 1): assert (n % ecomponents == 0) broadcast_factor = (n // ecomponents) if (ctype != e.ctype): e = OpenClConvert(ctype, e) - if (broadcast_factor>1): + if (broadcast_factor > 1): if exp: e = OpenClExpand(None, e, broadcast_factor) else: e = OpenClBroadCast(None, e, broadcast_factor) else: - if is_bool: # set all bits to 1 + if is_bool: # set all bits to 1 e = OpenClBool(e) if (vtype != e.ctype): e = OpenClCast(vtype, e) @@ -185,41 +185,25 @@ class OpenClCastUtils(object): exprs = _exprs if (ret is None): - ctype='void' + ctype = 'void' elif isinstance(ret, int): assert ret < len(exprs) ctype = exprs[ret].ctype elif isinstance(ret, str): - if (ret=='complex2real'): - assert n>=2 and n%2==0 + if (ret == 'complex2real'): + assert n >= 2 and n % 2 == 0 ret_dtype = demote_dtype(common_dtype, 'f') ctype = e.vtype(dtype_to_ctype(ret_dtype), n//2) - elif (ret=='real2complex'): + elif (ret == 'real2complex'): ret_dtype = demote_dtype(common_dtype, 'f') ctype = e.vtype(dtype_to_ctype(ret_dtype), n*2) else: - if ret=='btype': - ret='itype' + if ret == 'btype': + ret = 'itype' ret_dtype = demote_dtype(common_dtype, ret[0]) ctype = e.vtype(dtype_to_ctype(ret_dtype), n) else: - msg='Unknown return dtype {}.'.format(ret) + msg = 'Unknown return dtype {}.'.format(ret) raise NotImplementedError(msg) return exprs, ctype - - - @classmethod - def promote_expressions_to_float(cls, exprs): - dtypes = tuple( ctype_to_dtype(e.btype) for e in exprs ) - common_dtype = find_common_dtype(*dtypes) - float_dtype = match_float_type(common_dtype) - if float_dtype == npw.float16: - float_dtype = npw.float32 - elif float_dtype == npw.longdouble: - float_dtype = npw.float64 - fbtype = dtype_to_ctype(float_dtype) - - promoted = tuple(cls.promote_basetype_to(e, fbtype) - for e in exprs) - return promoted, fbtype diff --git a/hysop/backend/device/codegen/symbolic/kernels/custom_symbolic_time_integrate.py b/hysop/backend/device/codegen/symbolic/kernels/custom_symbolic_time_integrate.py index 7f17339ac..02d538609 100644 --- a/hysop/backend/device/codegen/symbolic/kernels/custom_symbolic_time_integrate.py +++ b/hysop/backend/device/codegen/symbolic/kernels/custom_symbolic_time_integrate.py @@ -1,4 +1,3 @@ - from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.device.codegen.kernels.custom_symbolic import CustomSymbolicKernelGenerator from hysop.backend.device.codegen.symbolic.expr import OpenClPrinter @@ -6,10 +5,12 @@ from hysop.symbolic.field import SymbolicDiscreteField from hysop.symbolic.misc import TimeIntegrate from hysop.backend.device.codegen.symbolic.functions.custom_symbolic_function import CustomSymbolicFunction from hysop.backend.device.codegen.symbolic.expr import OpenClAssignment, OpenClVariable, FunctionCall, UpdateVars, IfElse -from hysop.backend.device.codegen.base.variables import CodegenVariable, \ - CodegenVectorClBuiltin, CodegenArray, ctype_to_dtype +from hysop.backend.device.codegen.base.variables import CodegenVariable, \ + CodegenVectorClBuiltin, CodegenArray, ctype_to_dtype from hysop.backend.device.codegen.functions.vload import Vload from hysop.backend.device.codegen.functions.vstore import Vstore +from hysop.operator.base.custom_symbolic_operator import ValidExpressions + class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): @@ -35,10 +36,10 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): _ghosts = {} fcalls = () - shared = set() + shared = set() private = set() vnames = tuple() - for i,expr in enumerate(expr_info.dexprs): + for i, expr in enumerate(expr_info.dexprs): if isinstance(expr, TimeIntegrate): time_integrator, lhs, rhs = expr.args if isinstance(lhs, SymbolicDiscreteField): @@ -58,9 +59,9 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): K_args[vname] = CodegenArray('K_{}'.format(vname), vtype, tg, shape=(time_integrator.stages,)) Uo[vname] = CodegenVectorClBuiltin(vname+'__0', ctype, vectorization, typegen=tg, - const=True, nl=True) + const=True, nl=True) Uk[vname] = CodegenVectorClBuiltin(vname+'__k', ctype, vectorization, typegen=tg, - const=True, nl=True) + const=True, nl=True) _ghosts[vname] = ghosts dval = CustomSymbolicFunction.default_out_of_bounds_value(ctype_to_dtype(ctype)) @@ -68,29 +69,29 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): size = int(csc.array_size(lhs.field, lhs.index)) assert (size > 2*ghosts), (size, 2*ghosts) vload = Vload(tg, ctype, vectorization, default_val=dval, - itype=itype, restrict=True, storage=var.storage, - known_args=dict(size=size)) + itype=itype, restrict=True, storage=var.storage, + known_args=dict(size=size)) if (vload.name not in reqs): reqs[vload.name] = vload vloads[vname] = vload else: vloads[vname] = reqs[vload.name] vstore = Vstore(tg, ctype, vectorization, - itype=itype, restrict=True, storage=var.storage, - known_args=dict(size=size)) + itype=itype, restrict=True, storage=var.storage, + known_args=dict(size=size)) if (vstore.name not in reqs): reqs[vstore.name] = vstore vstores[vname] = vstore else: vstores[vname] = reqs[vstore.name] else: - msg='Unknown lhs type {} for assignment, valid ones are SymbolicDiscreteField.' - msg=msg.format(type(lhs)) + msg = 'Unknown lhs type {} for assignment, valid ones are SymbolicDiscreteField.' + msg = msg.format(type(lhs)) raise NotImplementedError(msg) - fname='f{}'.format(i) + fname = 'f{}'.format(i) rhs_fn = CustomSymbolicFunction(csc=csc, name=fname, expr=rhs, target_ctype=vtype, - inline=(not csc.tuning_mode), known_args=known_vars) + inline=(not csc.tuning_mode), known_args=known_vars) fn_kwds = rhs_fn.args.copy() rhs = FunctionCall(rhs_fn.ctype, rhs_fn, fn_kwds) vnames += (vname,) @@ -98,8 +99,8 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): reqs[fname] = rhs_fn assert (vtype == rhs.ctype), '{} != {}'.format(var.ctype, rhs.ctype) else: - msg='Unknown expression type {}, valid ones are {}.' - msg=msg.format(type(expr), ValidExpressions) + msg = 'Unknown expression type {}, valid ones are {}.' + msg = msg.format(type(expr), ValidExpressions) raise NotImplementedError(msg) self.time_integrator = time_integrator @@ -108,30 +109,29 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): self.K_args = K_args self.Uo = Uo self.Uk = Uk - self.vloads = vloads + self.vloads = vloads self.vstores = vstores self.private = private self.shared = shared self._ghosts = _ghosts return reqs - def generate_expr_code(self): - s = self - tg = s.typegen - csc = s.csc + s = self + tg = s.typegen + csc = s.csc info = csc.expr_info time_integrator = self.time_integrator args = csc.args - K = s.K_args - Uo = s.Uo - Uk = s.Uk + K = s.K_args + Uo = s.Uo + Uk = s.Uk fcalls = s.fcalls vnames = s.vnames - vloads = s.vloads + vloads = s.vloads vstores = s.vstores shared = s.shared private = s.private @@ -148,7 +148,7 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): G = ghosts[vname] if (vname in shared): load = vloads[vname](data=arg, - offset='{}+{}'.format(csc.local_offset,G)) + offset='{}+{}'.format(csc.local_offset, G)) uo.declare(al, align=True, init=load) else: uo.declare(al, align=True, init=arg) @@ -163,11 +163,11 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): if is_last: gamma = time_integrator.beta else: - gamma = time_integrator.gamma[i-1,:] + gamma = time_integrator.gamma[i-1, :] s.comment('Computing Runge-Kutta stage {}'.format(i)) with s._block_(): with s._align_() as al: - for (vname,fcall) in zip(vnames, fcalls): + for (vname, fcall) in zip(vnames, fcalls): k = K[vname] call = printer.doprint(fcall, terminate=False) code = '{} $= {};'.format(k[i-1], call) @@ -179,13 +179,13 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): with s._block_(): with s._align_() as al: for vname in vnames: - k = K[vname] + k = K[vname] uk = Uk[vname].nv_replace('k', str(i)) uo = Uo[vname] - init = ' $+ '.join('{}$*{}'.format(tg.dump(float(gamma[j])),k[j]) for j in range(i) if (gamma[j]!=0)) + init = ' $+ '.join('{}$*{}'.format(tg.dump(float(gamma[j])), k[j]) for j in range(i) if (gamma[j] != 0)) init = '{} $+ {}*{}*({})'.format(uo, - tg.dump(float(self.csc.expr_info.dt_coeff)), - self.args[self.csc.expr_info.dt.name], init) + tg.dump(float(self.csc.expr_info.dt_coeff)), + self.args[self.csc.expr_info.dt.name], init) uk.declare(al, init=init, align=True) s.jumpline() for vname in shared: @@ -193,7 +193,7 @@ class CustomSymbolicTimeIntegrateKernelGenerator(CustomSymbolicKernelGenerator): value = Uk[vname].nv_replace('k', str(i)) G = ghosts[vname] code = vstores[vname](value=value, data=arg, - offset='{}+{}'.format(csc.local_offset,G)) + ';' + offset='{}+{}'.format(csc.local_offset, G)) + ';' s.append(code) if private: with s._align_() as al: diff --git a/hysop/backend/device/logical_device.py b/hysop/backend/device/logical_device.py index 44682869e..7fabdb2ff 100644 --- a/hysop/backend/device/logical_device.py +++ b/hysop/backend/device/logical_device.py @@ -1,10 +1,10 @@ - from abc import ABCMeta, abstractmethod from hysop.tools.enum import EnumFactory from hysop.tools.types import to_tuple from hysop.backend.device.device_info import device_info from hysop.constants import DeviceType + class UnknownDeviceAttribute(object): def __str__(self): return 'unknown' @@ -13,8 +13,8 @@ class UnknownDeviceAttribute(object): class LogicalDevice(object, metaclass=ABCMeta): def __init__(self, platform, platform_handle, device_id, device_handle, - hardware_topo, **kargs): - super(LogicalDevice,self).__init__(**kargs) + hardware_topo, **kargs): + super(LogicalDevice, self).__init__(**kargs) self._platform = platform self._device_id = device_id physical_devices = self._match_physical_devices(hardware_topo=hardware_topo) @@ -74,59 +74,74 @@ class LogicalDevice(object, metaclass=ABCMeta): pass def __str__(self): - return self.to_string() + return self.to_string(indent=0, increment=2) -#DEVICE +# DEVICE @abstractmethod def name(self): pass + @abstractmethod def platform_name(self): pass + @abstractmethod def type(self): pass + @abstractmethod def vendor(self): pass + @abstractmethod def vendor_id(self): pass + @abstractmethod def max_clock_frequency(self): pass + @abstractmethod def address_bits(self): pass + @abstractmethod def little_endian(self): pass + @abstractmethod def available(self): pass + @abstractmethod def compiler_available(self): pass + @abstractmethod def error_correction_support(self): pass -#KERNEL +# KERNEL @abstractmethod def max_grid_dim(self): pass + @abstractmethod def max_grid_size(self): pass + @abstractmethod def max_block_dim(self): pass + @abstractmethod def max_block_size(self): pass + @abstractmethod def max_threads_per_block(self): pass + @abstractmethod def simd_lane_size(self): pass @@ -135,19 +150,23 @@ class LogicalDevice(object, metaclass=ABCMeta): def max_constant_args(self): pass -#MEMORY +# MEMORY @abstractmethod def global_mem_size(self): pass + @abstractmethod def global_mem_cache_size(self): pass + @abstractmethod def global_mem_cacheline_size(self): pass + @abstractmethod def global_mem_cache_type(self): pass + @abstractmethod def max_global_alloc_size(self): pass @@ -155,15 +174,17 @@ class LogicalDevice(object, metaclass=ABCMeta): @abstractmethod def local_mem_size(self): pass + @abstractmethod def local_mem_type(self): pass -#DEVICE SPLITTING +# DEVICE SPLITTING @abstractmethod def has_device_partition_support(self): pass + @abstractmethod def max_subdevices(self): pass @@ -173,96 +194,118 @@ class LogicalDevice(object, metaclass=ABCMeta): def has_queue_priority_support(self): pass -#FP SUPPORT +# FP SUPPORT @abstractmethod def has_fp16(self): pass + @abstractmethod def has_fp32(self): pass + @abstractmethod def has_fp64(self): pass + @abstractmethod def fp16_config(self): pass + @abstractmethod def fp32_config(self): pass + @abstractmethod def fp64_config(self): pass -#IMAGES +# IMAGES def has_image_support(self): pass + def max_image_args(self): pass + def max_read_image_args(self): pass + def max_write_image_args(self): pass + def max_samplers(self): pass def has_1d_image_support(self): pass + def has_2d_image_support(self): pass + def has_3d_image_support(self): pass def has_1d_image_write_support(self): pass + def has_2d_image_write_support(self): pass + def has_3d_image_write_support(self): pass def has_1d_image_array_support(self): pass + def has_2d_array_image_support(self): pass def max_1d_image_size(self): pass + def max_1d_image_array_size(self): pass def max_2d_image_size(self): pass + def max_2d_image_array_size(self): pass def max_3d_image_size(self): pass - def has_2d_image_from_buffer_support(self): pass + def has_2d_image_from_image_support(self): pass def image_base_address_alignment(self): pass + def image_pitch_aligment(self): pass + def image_max_buffer_size(self): pass + def image_max_array_size(self): pass -#ATOMICS +# ATOMICS @abstractmethod def has_global_int32_atomics(self): pass + @abstractmethod def has_global_int64_atomics(self): pass + @abstractmethod def has_global_float32_atomics(self): pass + @abstractmethod def has_global_float64_atomics(self): pass @@ -270,12 +313,15 @@ class LogicalDevice(object, metaclass=ABCMeta): @abstractmethod def has_local_int32_atomics(self): pass + @abstractmethod def has_local_int64_atomics(self): pass + @abstractmethod def has_local_float32_atomics(self): pass + @abstractmethod def has_local_float64_atomics(self): pass @@ -283,12 +329,15 @@ class LogicalDevice(object, metaclass=ABCMeta): @abstractmethod def has_mixed_int32_atomics(self): pass + @abstractmethod def has_mixed_int64_atomics(self): pass + @abstractmethod def has_mixed_float32_atomics(self): pass + @abstractmethod def has_mixed_float64_atomics(self): pass @@ -296,6 +345,7 @@ class LogicalDevice(object, metaclass=ABCMeta): @abstractmethod def has_int32_hardware_atomic_counters(self): pass + @abstractmethod def has_int64_hardware_atomic_counters(self): pass @@ -303,9 +353,11 @@ class LogicalDevice(object, metaclass=ABCMeta): @abstractmethod def preferred_platform_atomic_alignment(self): pass + @abstractmethod def preferred_local_atomic_alignment(self): pass + @abstractmethod def preferred_global_atomic_alignment(self): pass @@ -314,6 +366,7 @@ class LogicalDevice(object, metaclass=ABCMeta): @abstractmethod def has_profiling_support(self): pass + @abstractmethod def profiling_time_resolution(self): pass @@ -322,6 +375,7 @@ class LogicalDevice(object, metaclass=ABCMeta): @abstractmethod def has_printf_support(self): pass + @abstractmethod def printf_buffer_size(self): pass @@ -330,18 +384,23 @@ class LogicalDevice(object, metaclass=ABCMeta): @abstractmethod def has_gl_sharing(self): pass + @abstractmethod def has_gl_event_sharing(self): pass + @abstractmethod def has_gl_msaa_sharing(self): pass + @abstractmethod def has_dx9_sharing(self): pass + @abstractmethod def has_dx10_sharing(self): pass + @abstractmethod def has_dx11_sharing(self): pass diff --git a/hysop/backend/device/opencl/operator/directional/stretching_dir.py b/hysop/backend/device/opencl/operator/directional/stretching_dir.py index 478469e57..ae37a02b0 100644 --- a/hysop/backend/device/opencl/operator/directional/stretching_dir.py +++ b/hysop/backend/device/opencl/operator/directional/stretching_dir.py @@ -176,12 +176,12 @@ class OpenClDirectionalStretching(OpenClDirectionalOperator): vorticity_mesh_info = self.vorticity_mesh_info direction = self.splitting_direction - formulation = self.formulation + fomulation = self.formulation discretization = self.space_discretization time_integrator = self.time_integrator cl_env = self.cl_env - precision = self.precision + typegen = self.typegen build_options = self.build_options() autotuner_config = self.autotuner_config @@ -189,7 +189,7 @@ class OpenClDirectionalStretching(OpenClDirectionalOperator): total_work, per_work_statistic, cached_bytes) = \ DirectionalStretchingKernel.autotune( cl_env=cl_env, - precision=precision, + typegen=typegen, build_options=build_options, autotuner_config=autotuner_config, direction=direction, diff --git a/hysop/backend/host/fortran/operator/scales_advection.py b/hysop/backend/host/fortran/operator/scales_advection.py index 1b930d04e..930a48edf 100644 --- a/hysop/backend/host/fortran/operator/scales_advection.py +++ b/hysop/backend/host/fortran/operator/scales_advection.py @@ -14,7 +14,7 @@ from hysop.fields.cartesian_discrete_field import CartesianDiscreteTensorField from hysop.topology.cartesian_descriptor import CartesianTopologyDescriptors from hysop.parameters.scalar_parameter import ScalarParameter from hysop.methods import Interpolation, TimeIntegrator, Remesh, \ - MultiScaleInterpolation, StrangOrder + MultiScaleInterpolation, StrangOrder from hysop.numerics.remesh.remesh import RemeshKernel from hysop.tools.numpywrappers import npw from hysop.core.graph.graph import op_apply @@ -25,22 +25,22 @@ from hysop.backend.host.fortran.fortran_operator import FortranOperator class ScalesAdvection(FortranOperator): """Particle advection using Fortran Scales library.""" - __rmsh_to_scales__ = {# Mprime kernels - Remesh.Mp4: 'p_M4', - Remesh.Mp6: 'p_M6', - Remesh.Mp8: 'p_M8', - # Lambda kernels - Remesh.L2_1: 'p_M4', - Remesh.L4_2: 'p_M6', - Remesh.L4_4: 'p_44', - Remesh.L6_4: 'p_64', - Remesh.L6_6: 'p_66', - Remesh.L8_4: 'p_84', - # Corrected kernels - Remesh.O2: 'p_O2', - Remesh.O4: 'p_O4', - # Corrected and limited kernels - Remesh.L2: 'p_L2'} + __rmsh_to_scales__ = { # Mprime kernels + Remesh.Mp4: 'p_M4', + Remesh.Mp6: 'p_M6', + Remesh.Mp8: 'p_M8', + # Lambda kernels + Remesh.L2_1: 'p_M4', + Remesh.L4_2: 'p_M6', + Remesh.L4_4: 'p_44', + Remesh.L6_4: 'p_64', + Remesh.L6_6: 'p_66', + Remesh.L8_4: 'p_84', + # Corrected kernels + Remesh.O2: 'p_O2', + Remesh.O4: 'p_O4', + # Corrected and limited kernels + Remesh.L2: 'p_L2'} __interpol_to_scales = {Interpolation.LINEAR: 'lin', Interpolation.L4_4: 'L4_4', @@ -51,20 +51,20 @@ class ScalesAdvection(FortranOperator): StrangOrder.STRANG_SECOND_ORDER: 'strang'} __default_method = { - TimeIntegrator: RK2, - Interpolation: Interpolation.LINEAR, - MultiScaleInterpolation: Interpolation.LINEAR, - StrangOrder: StrangOrder.STRANG_SECOND_ORDER, - Remesh: Remesh.L2_1, - } + TimeIntegrator: RK2, + Interpolation: Interpolation.LINEAR, + MultiScaleInterpolation: Interpolation.LINEAR, + StrangOrder: StrangOrder.STRANG_SECOND_ORDER, + Remesh: Remesh.L2_1, + } __available_methods = { - TimeIntegrator: RK2, - Interpolation: Interpolation.LINEAR, - MultiScaleInterpolation: __interpol_to_scales.keys(), - StrangOrder: __dim_splitting_to_scales.keys(), - Remesh: __rmsh_to_scales__.keys(), - } + TimeIntegrator: RK2, + Interpolation: Interpolation.LINEAR, + MultiScaleInterpolation: __interpol_to_scales.keys(), + StrangOrder: __dim_splitting_to_scales.keys(), + Remesh: __rmsh_to_scales__.keys(), + } @classmethod def default_method(cls): @@ -80,13 +80,12 @@ class ScalesAdvection(FortranOperator): @debug def __new__(cls, velocity, - advected_fields_in, advected_fields_out, - variables, dt, **kwds): + advected_fields_in, advected_fields_out, + variables, dt, **kwds): return super(ScalesAdvection, cls).__new__(cls, - input_fields=None, output_fields=None, - input_params=None, output_params=None, - **kwds) - + input_fields=None, output_fields=None, + input_params=None, output_params=None, + **kwds) @debug def __init__(self, velocity, @@ -139,7 +138,7 @@ class ScalesAdvection(FortranOperator): is_inplace = True for (ifield, ofield) in zip(advected_fields_in, advected_fields_out): - input_fields[ifield] = variables[ifield] + input_fields[ifield] = variables[ifield] output_fields[ofield] = variables[ofield] if (ifield is ofield): assert is_inplace, 'Cannot mix inplace and out of place scales advection.' @@ -159,14 +158,14 @@ class ScalesAdvection(FortranOperator): raise NotImplementedError("Scales only implements 3D advection.") for field in self.fields: if (field.dtype != HYSOP_REAL): - msg='Scales only implements advection using precision speficied at ' - msg+='compile time (HYSOP_REAL={}) but field {} has dtype {}.' - msg=msg.format(HYSOP_REAL.__name__, field.name, field.dtype) + msg = 'Scales only implements advection using precision speficied at ' + msg += 'compile time (HYSOP_REAL={}) but field {} has dtype {}.' + msg = msg.format(HYSOP_REAL.__name__, field.name, field.dtype) raise RuntimeError(msg) self.velocity = velocity self.first_scalar = advected_fields_in[0].fields[0] - self.advected_fields_in = advected_fields_in + self.advected_fields_in = advected_fields_in self.advected_fields_out = advected_fields_out self.dt = dt @@ -232,56 +231,55 @@ class ScalesAdvection(FortranOperator): dvelocity = self.get_input_discrete_field(self.velocity) # Ravel all tensor fields to scalar fields and get corresponding discrete scalar fields - dadvected_fields_in = tuple(self.get_input_discrete_field(ifield) - for itfield in self.advected_fields_in - for ifield in itfield.fields) + dadvected_fields_in = tuple(self.get_input_discrete_field(ifield) + for itfield in self.advected_fields_in + for ifield in itfield.fields) dadvected_fields_out = tuple(self.get_output_discrete_field(ofield) - for otfield in self.advected_fields_out - for ofield in otfield.fields) + for otfield in self.advected_fields_out + for ofield in otfield.fields) assert len(dadvected_fields_in) == len(dadvected_fields_out) if is_inplace: assert all((din._dfield is dout._dfield) - for (din, dout) in zip(dadvected_fields_in, dadvected_fields_out)) + for (din, dout) in zip(dadvected_fields_in, dadvected_fields_out)) # check that every advected field has the same grid size and space step dS0 = self.get_input_discrete_field(self.first_scalar) for df in set(dadvected_fields_in + dadvected_fields_out): if any(df.space_step != dS0.space_step): - msg='Space step mismatch between discrete fields {} and {}.' - msg=msg.format(df.name, dS0.name) + msg = 'Space step mismatch between discrete fields {} and {}.' + msg = msg.format(df.name, dS0.name) raise ValueError(msg) if any(df.resolution != dS0.resolution): - msg='Resolution mismatch between discrete fields {} and {}.' - msg=msg.format(df.name, dS0.name) + msg = 'Resolution mismatch between discrete fields {} and {}.' + msg = msg.format(df.name, dS0.name) raise ValueError(msg) # The SCALES library for advection works only with # 3D 1-component scalars or 3-components vectors so we # merge advected scalars back to 3-component tensors while we can. nscalars = len(dadvected_fields_in) - assert nscalars>=1 + assert nscalars >= 1 dSin, dSout, all_buffers = (), (), () # 3-components fields for i in range(nscalars//3): - dfields_in = dadvected_fields_in[3*i:3*(i+1)] + dfields_in = dadvected_fields_in[3*i:3*(i+1)] dfields_out = dadvected_fields_out[3*i:3*(i+1)] sin = CartesianDiscreteTensorField.from_dfields(name='Sin{}'.format(i), dfields=dfields_in, shape=(3,)) sout = CartesianDiscreteTensorField.from_dfields(name='Sout{}'.format(i), - dfields=dfields_out, - shape=(3,)) + dfields=dfields_out, + shape=(3,)) buffers = dvelocity.buffers + sout.buffers assert all(b.flags.f_contiguous for b in buffers) - dSin += (sin,) + dSin += (sin,) dSout += (sout,) all_buffers += (buffers,) - # 1-components fields - dSin += dadvected_fields_in[(nscalars//3)*3:] + dSin += dadvected_fields_in[(nscalars//3)*3:] dSout += dadvected_fields_out[(nscalars//3)*3:] all_buffers += tuple(dvelocity.buffers+sout.buffers for sout in dadvected_fields_out[(nscalars//3)*3:]) assert len(dSin) == len(dSout) == len(all_buffers) >= 1 @@ -291,8 +289,8 @@ class ScalesAdvection(FortranOperator): is_bilevel = dvelocity.compute_resolution self.dvelocity = dvelocity - self.dS0 = dS0 - self.dSin = dSin + self.dS0 = dS0 + self.dSin = dSin self.dSout = dSout self.dadvected_fields_in = dadvected_fields_in self.dadvected_fields_out = dadvected_fields_out @@ -304,8 +302,8 @@ class ScalesAdvection(FortranOperator): super(ScalesAdvection, self).setup(work) v_topo = self.dvelocity.topology s_topo = self.dS0.topology - msg0="No ghosts allowed in Scales advection" - msg1="Scales is only for periodic domains." + msg0 = "No ghosts allowed in Scales advection" + msg1 = "Scales is only for periodic domains." assert (v_topo.ghosts == 0).all(), msg0 assert v_topo.mesh.periodicity.all(), msg1 for (dfi, dfo) in zip(self.dadvected_fields_in, @@ -361,11 +359,10 @@ class ScalesAdvection(FortranOperator): else: scales_func.append(scales.solve_advection) else: - msg='Scales only can only handle 3D advected field with one or three components.' + msg = 'Scales only can only handle 3D advected field with one or three components.' raise NotImplementedError(msg) self._scales_func = tuple(scales_func) - @op_apply def apply(self, **kwds): """Solve advection using Fortran SCALES library diff --git a/hysop/backend/host/host_array_backend.py b/hysop/backend/host/host_array_backend.py index ca2592e4d..4e554ba77 100644 --- a/hysop/backend/host/host_array_backend.py +++ b/hysop/backend/host/host_array_backend.py @@ -193,7 +193,7 @@ class HostArrayBackend(ArrayBackend): if dtype == np.bool_: dtype = HYSOP_BOOL - import warning + import warnings msg = 'HostArrayBackend: numpy bool array converted to hysop_bool={}.'.format(dtype) warnings.warn(msg, HysopWarning) diff --git a/hysop/backend/host/host_operator.py b/hysop/backend/host/host_operator.py index cd20d2223..7b5406ef2 100644 --- a/hysop/backend/host/host_operator.py +++ b/hysop/backend/host/host_operator.py @@ -59,7 +59,7 @@ class OpenClMappedMemoryObjectGetter(object): self.__evt = evt def __getitem__(self, key): - return obj.get_mapped_object(key=key) + return self.__obj.get_mapped_object(key=key) @property def evt(self): @@ -83,30 +83,30 @@ class OpenClMappable(object): # enforce opencl topology on host operator for (field, topo_descriptor) in self.input_fields.items(): topo_descriptor = TopologyDescriptor.build_descriptor( - backend=Backend.OPENCL, - operator=self, - field=field, - handle=topo_descriptor, - cl_env=self.cl_env) + backend=Backend.OPENCL, + operator=self, + field=field, + handle=topo_descriptor, + cl_env=self.cl_env) self.input_fields[field] = topo_descriptor for (field, topo_descriptor) in self.output_fields.items(): topo_descriptor = TopologyDescriptor.build_descriptor( - backend=Backend.OPENCL, - operator=self, - field=field, - handle=topo_descriptor, - cl_env=self.cl_env) + backend=Backend.OPENCL, + operator=self, + field=field, + handle=topo_descriptor, + cl_env=self.cl_env) self.output_fields[field] = topo_descriptor else: super(OpenClMappable, self).create_topology_descriptors() def __new__(cls, cl_env=None, mpi_params=None, - enable_opencl_host_buffer_mapping=False, **kwds): + enable_opencl_host_buffer_mapping=False, **kwds): return super(OpenClMappable, cls).__new__(cls, mpi_params=mpi_params, **kwds) def __init__(self, cl_env=None, mpi_params=None, - enable_opencl_host_buffer_mapping=False, **kwds): + enable_opencl_host_buffer_mapping=False, **kwds): if enable_opencl_host_buffer_mapping: msg = 'OpenClMappable is an interface dedicated to extend HostOperator.' @@ -126,7 +126,7 @@ class OpenClMappable(object): self.__mapped = False self.__registered_objects = {} self.__registered_getters = {} - self.__mapped_objects = {} + self.__mapped_objects = {} def __del__(self): self.unmap_objects(force=True) @@ -145,8 +145,8 @@ class OpenClMappable(object): def _register_fields(self): from hysop.fields.discrete_field import DiscreteScalarField, DiscreteScalarFieldView - ivfields = set(filter(lambda f: f.backend.kind==Backend.OPENCL, self.input_discrete_fields.values())) - ovfields = set(filter(lambda f: f.backend.kind==Backend.OPENCL, self.output_discrete_fields.values())) + ivfields = set(filter(lambda f: f.backend.kind == Backend.OPENCL, self.input_discrete_fields.values())) + ovfields = set(filter(lambda f: f.backend.kind == Backend.OPENCL, self.output_discrete_fields.values())) check_instance(ivfields, set, values=DiscreteScalarFieldView) check_instance(ovfields, set, values=DiscreteScalarFieldView) vfields = ivfields.union(ovfields) @@ -169,40 +169,40 @@ class OpenClMappable(object): flags |= cl.map_flags.WRITE assert (field._data is not None) self.register_mappable_object(key=field, obj=field._data.handle, - flags=flags) + flags=flags) for vfield in vfields: self.register_data_getter(get_key=vfield, obj_key=vfield.dfield, - getter=vfield._compute_data_view) + getter=vfield._compute_data_view) def register_mappable_object(self, key, obj, flags): from hysop.backend.device.opencl import clArray - msg='Device memory object "{}" has already been registered.' - msg=msg.format(key) + msg = 'Device memory object "{}" has already been registered.' + msg = msg.format(key) assert (key not in self.__registered_objects), msg check_instance(obj, clArray.Array) self.__registered_objects[key] = (obj, flags) def register_data_getter(self, get_key, obj_key, getter): assert callable(getter) - msg='Device memory getter "{}" has already been registered as an object.' - msg=msg.format(get_key) + msg = 'Device memory getter "{}" has already been registered as an object.' + msg = msg.format(get_key) assert (get_key not in self.__registered_objects), msg - msg='Device memory getter "{}" has already been registered as a getter.' - msg=msg.format(get_key) + msg = 'Device memory getter "{}" has already been registered as a getter.' + msg = msg.format(get_key) assert (get_key not in self.__registered_getters), msg - msg='Device memory object "{}" has not been registered.' - msg=msg.format(obj_key) + msg = 'Device memory object "{}" has not been registered.' + msg = msg.format(obj_key) assert (obj_key in self.__registered_objects), msg self.__registered_getters[get_key] = (obj_key, getter) def map_objects(self, queue, is_blocking): - DEBUG=False - msg='Device memory objects have already been mapped to host.' + DEBUG = False + msg = 'Device memory objects have already been mapped to host.' assert not self.__mapped, msg evt = None for (obj_key, (dev_buf, flags)) in self.__registered_objects.items(): if DEBUG: - msg='Mapping {}...'.format(obj_key.full_tag) + msg = 'Mapping {}...'.format(obj_key.full_tag) print(msg) if is_blocking: host_buf = dev_buf.map_to_host(queue=queue, is_blocking=is_blocking, flags=flags) @@ -211,29 +211,30 @@ class OpenClMappable(object): self.__mapped_objects[obj_key] = host_buf for (get_key, (obj_key, getter)) in self.__registered_getters.items(): if DEBUG: - msg='Applying getter {} to mapped buffer {}...'.format(get_key.full_tag, obj_key.full_tag) + msg = 'Applying getter {} to mapped buffer {}...'.format(get_key.full_tag, obj_key.full_tag) print(msg) self.__mapped_objects[get_key] = getter(self.__mapped_objects[obj_key]) self.__mapped = True return evt def unmap_objects(self, force=False): - msg='Device memory objects have already been unmapped from host.' + msg = 'Device memory objects have already been unmapped from host.' assert force or self.__mapped, msg self.__mapped_objects.clear() self.__mapped = False def get_mapped_object(self, key): - msg='Device memory objects have not been mapped to host yet.' + msg = 'Device memory objects have not been mapped to host yet.' assert self.__mapped, msg - msg='Device memory object "{}" has not been mapped.' - msg=msg.format(key) + msg = 'Device memory object "{}" has not been mapped.' + msg = msg.format(key) assert key in self.__mapped_objects, msg return self.__mapped_objects[key] def build_object_getter(self, key): - msg='Device memory object "{}" has not been registered.' - msg=msg.format(key) + import functools + msg = 'Device memory object "{}" has not been registered.' + msg = msg.format(key) assert key in self.__registered_objects, msg return functools.partial(self.get_mapped_object, key=key) @@ -254,4 +255,3 @@ class OpenClMappable(object): yield except: raise - diff --git a/hysop/backend/host/python/operator/directional/advection_dir.py b/hysop/backend/host/python/operator/directional/advection_dir.py index 39f37f0d4..254233e09 100644 --- a/hysop/backend/host/python/operator/directional/advection_dir.py +++ b/hysop/backend/host/python/operator/directional/advection_dir.py @@ -1,5 +1,5 @@ from hysop.tools.numpywrappers import npw -from hysop.tools.decorators import debug +from hysop.tools.decorators import debug from hysop.tools.types import check_instance, to_tuple from hysop.core.graph.graph import op_apply from hysop.core.memory.memory_request import MemoryRequest @@ -14,6 +14,7 @@ from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta, Euler, RK2 DEBUG = False + class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperator): counter = 0 @@ -29,7 +30,7 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat def get_work_properties(self): requests = super(PythonDirectionalAdvection, self).get_work_properties() - V = self.dvelocity + V = self.dvelocity Vr = self.relative_velocity check_instance(V, DiscreteScalarFieldView) check_instance(Vr, float) @@ -37,21 +38,21 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat cr = self.compute_granularity shape = tuple(self.dvelocity.compute_resolution[cr:]) - nbuffers = { Euler:0, RK2:2, RK3:3, RK4:4 } + nbuffers = {Euler: 0, RK2: 2, RK3: 3, RK4: 4} time_integrator = self.time_integrator nb_rcomponents = max(nbuffers[time_integrator], 2) nb_icomponents = 1 if (time_integrator is Euler) else 2 request = MemoryRequest.empty_like(a=V, shape=shape, - nb_components=nb_rcomponents) + nb_components=nb_rcomponents) requests.push_mem_request('rtmp', request) request = MemoryRequest.empty_like(a=V, shape=shape, - nb_components=nb_icomponents, dtype=npw.int32) + nb_components=nb_icomponents, dtype=npw.int32) requests.push_mem_request('itmp', request) request = MemoryRequest.empty_like(a=V, shape=shape[:-1]+(1,), - nb_components=1, dtype=npw.int32) + nb_components=1, dtype=npw.int32) requests.push_mem_request('ixtmp', request) nscalars = sum(field.nb_components for field in self.advected_fields_in) @@ -59,7 +60,7 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat rg = self.remesh_ghosts sshape = shape[:-1] + (shape[-1]+2*rg,) request = MemoryRequest.empty_like(a=V, shape=sshape, - nb_components=nscalars) + nb_components=nscalars) requests.push_mem_request('stmp', request) self.nscalars = nscalars self.Vr = Vr @@ -76,22 +77,22 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat self.dstmp = work.get_buffer(self, 'stmp', handle=True) if self.is_bilevel: - msg="Python bilevel advection has not been implemented yet." + msg = "Python bilevel advection has not been implemented yet." raise NotImplementedError(msg) self._prepare_apply() def _prepare_apply(self): - cr = self.compute_granularity - sinputs = self.advected_fields_in - soutputs = self.advected_fields_out - dsinputs = self.dadvected_fields_in + cr = self.compute_granularity + sinputs = self.advected_fields_in + soutputs = self.advected_fields_out + dsinputs = self.dadvected_fields_in dsoutputs = self.dadvected_fields_out velo_mesh = self.dvelocity.mesh velo_mesh_iterator = velo_mesh.build_compute_mesh_iterator(cr) - X0 = velo_mesh.local_compute_coords[-1] - dx = velo_mesh.space_step[-1] + X0 = velo_mesh.local_compute_coords[-1] + dx = velo_mesh.space_step[-1] inv_dx = (1.0/dx) velo_compute_view = velo_mesh.local_compute_slices self._velocity_mesh_attributes = (velo_mesh_iterator, dx, inv_dx, velo_compute_view, X0) @@ -99,9 +100,9 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat dsinputs0 = next(iter(dsinputs.values())) scalar_mesh = dsinputs0.mesh scalar_mesh_iterator = scalar_mesh.build_compute_mesh_iterator(cr) - N0 = scalar_mesh.global_start[-1] - sdx = scalar_mesh.space_step[-1] - inv_sdx = (1.0/sdx) + N0 = scalar_mesh.global_start[-1] + sdx = scalar_mesh.space_step[-1] + inv_sdx = (1.0/sdx) scalar_compute_view = scalar_mesh.local_compute_slices self._scalar_mesh_attributes = (scalar_mesh_iterator, sdx, inv_sdx, scalar_compute_view, N0) @@ -112,8 +113,8 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat in_ghosts, out_ghosts = {}, {} in_shapes, out_shapes = {}, {} - for (ifield,ofield) in zip(sinputs, soutputs): - Sin = dsinputs[ifield] + for (ifield, ofield) in zip(sinputs, soutputs): + Sin = dsinputs[ifield] Sout = dsoutputs[ofield] in_compute_slices[Sin] = Sin.compute_slices in_ghosts[Sin] = Sin.ghosts @@ -129,8 +130,8 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat def apply(self, simulation=None, debug_dumper=None, **kwds): super(PythonDirectionalAdvection, self).apply(**kwds) - dsoutputs = self.dadvected_fields_out - dt = self.dt() * self.dt_coeff + dsoutputs = self.dadvected_fields_out + dt = self.dt() * self.dt_coeff if DEBUG: import inspect @@ -138,15 +139,15 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat def dump(dfield, tag): it = simulation.current_iteration t = simulation.t() - _file,_line = inspect.stack()[1][1:3] + _file, _line = inspect.stack()[1][1:3] debug_dumper(it, t, tag, tuple(df.sdata.get().handle[df.compute_slices] - for df in dfield.dfields), description=None) + for df in dfield.dfields), description=None) else: def dump(*args, **kwds): pass - Sin = next(iter(self.dadvected_fields_in.values())) + Sin = next(iter(self.dadvected_fields_in.values())) Sout = next(iter(self.dadvected_fields_out.values())) - P = self.dposition + P = self.dposition print('DT= {}'.format(dt)) self._compute_advection(dt) print('P') @@ -157,7 +158,7 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat dump(Sin, 'Sin before remesh') self._compute_remesh() print('S (before accumulation)') - print(Sout[0].sbuffer[Sout[0].local_slices(ghosts=(0,self.remesh_ghosts))]) + print(Sout[0].sbuffer[Sout[0].local_slices(ghosts=(0, self.remesh_ghosts))]) dump(Sin, 'Sout (after remesh)') for sout in dsoutputs.values(): print('Accumulate {}'.format(sout.short_description())) @@ -216,7 +217,7 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat ridx[...] %= N # Vout = Vl + alpha*(Vr-Vl) - Vout[...] = Vin[Iy+(ridx,)] + Vout[...] = Vin[Iy+(ridx,)] Vout[...] -= Vin[Iy+(lidx,)] Vout[...] *= alpha @@ -226,70 +227,70 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat Vout[...] -= Vr def _compute_advection(self, dt): - P = self.dpos - Vd = self.dvelocity.sbuffer + P = self.dpos + Vd = self.dvelocity.sbuffer rtmp = self.drtmp itmp = self.ditmp cr = self.compute_granularity is_periodic = self.is_periodic - rk_scheme = self.time_integrator + rk_scheme = self.time_integrator - (mesh_it,dx,inv_dx,view,X0) = self._velocity_mesh_attributes + (mesh_it, dx, inv_dx, view, X0) = self._velocity_mesh_attributes _Vd = Vd[view] - Vd = Vd[view[:-1]+(slice(None),)] - Vr = self.Vr # relative velocity + Vd = Vd[view[:-1]+(slice(None),)] + Vr = self.Vr # relative velocity if DEBUG: # check if CFL condition is met cfl = self.velocity_cfl Vmin, Vmax = Vd.min(), Vd.max() Vinf = max(abs(Vmin), abs(Vmax)) - msg='Vinf={}, dt={}, dx={} => Vinf*dt/dx={} but cfl={}.' - msg=msg.format(Vinf, dt, dx, Vinf*dt*inv_dx, cfl) + msg = 'Vinf={}, dt={}, dx={} => Vinf*dt/dx={} but cfl={}.' + msg = msg.format(Vinf, dt, dx, Vinf*dt*inv_dx, cfl) assert Vinf*dt*inv_dx <= cfl, msg # fill position with first direction coordinates on the whole compute domain - for (idx,_,I,Ig) in mesh_it.iter_compute_mesh(): + for (idx, _, I, Ig) in mesh_it.iter_compute_mesh(): Pi = P[idx] - Vi = Vd[idx] - _Vi = _Vd[idx] # Vi without ghosts + Vi = Vd[idx] + _Vi = _Vd[idx] # Vi without ghosts if rk_scheme.name() == 'Euler': - Pi[...] = _Vi - Vr + Pi[...] = _Vi - Vr Pi[...] *= dt Pi[...] += X0 elif rk_scheme.name() == 'RK2': - (lidx,ridx) = itmp - (dX0,V1) = rtmp - dX0[...] = _Vi - Vr + (lidx, ridx) = itmp + (dX0, V1) = rtmp + dX0[...] = _Vi - Vr dX0[...] *= (0.5*dt) - self._interp_velocity(Vi,V1,dX0,I,Ig,lidx,ridx,inv_dx,is_periodic,Vr) - Pi[...] = V1 + self._interp_velocity(Vi, V1, dX0, I, Ig, lidx, ridx, inv_dx, is_periodic, Vr) + Pi[...] = V1 Pi[...] *= dt Pi[...] += X0 elif rk_scheme.name() == 'RK4': - (lidx,ridx) = itmp - (dXk,V1,V2,V3) = rtmp + (lidx, ridx) = itmp + (dXk, V1, V2, V3) = rtmp dXk[...] = _Vi - Vr dXk[...] *= (0.5*dt) - self._interp_velocity(Vi,V1,dXk,I,Ig,lidx,ridx,inv_dx,is_periodic,Vr) + self._interp_velocity(Vi, V1, dXk, I, Ig, lidx, ridx, inv_dx, is_periodic, Vr) dXk[...] = V1 dXk[...] *= (0.5*dt) - self._interp_velocity(Vi,V2,dXk,I,Ig,lidx,ridx,inv_dx,is_periodic,Vr) + self._interp_velocity(Vi, V2, dXk, I, Ig, lidx, ridx, inv_dx, is_periodic, Vr) dXk[...] = V2 dXk[...] *= (1.0*dt) - self._interp_velocity(Vi,V3,dXk,I,Ig,lidx,ridx,inv_dx,is_periodic,Vr) + self._interp_velocity(Vi, V3, dXk, I, Ig, lidx, ridx, inv_dx, is_periodic, Vr) V0 = dXk - V0[...] = _Vi - Vr + V0[...] = _Vi - Vr V0[...] *= (1.0/6.0) V1[...] *= (2.0/6.0) V2[...] *= (2.0/6.0) V3[...] *= (1.0/6.0) - Pi[...] = V0 + Pi[...] = V0 Pi[...] += V1 Pi[...] += V2 Pi[...] += V3 @@ -304,30 +305,30 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat finfo = npw.finfo(P.dtype) eps, epsneg = finfo.eps, finfo.epsneg - msg='Pmin={}, X0[0]={}, Vmin={}, dt={}, X0[0]+Vmin*dt={} Pmin < X0[0] + Vmin*dt' - msg=msg.format(Pmin, X0[0], Vmin, dt, X0[0]+Vmin*dt) + msg = 'Pmin={}, X0[0]={}, Vmin={}, dt={}, X0[0]+Vmin*dt={} Pmin < X0[0] + Vmin*dt' + msg = msg.format(Pmin, X0[0], Vmin, dt, X0[0]+Vmin*dt) assert Pmin >= X0[0] + Vmin*dt - 10*epsneg, msg - msg='Pmax={}, X0[-1]={}, Vmax={}, dt={}, X0[-1]+Vmax*dt={} Pmax > X0[-1] + Vmax*dt' - msg=msg.format(Pmax, X0[-1], Vmax, dt, X0[-1]+Vmax*dt) + msg = 'Pmax={}, X0[-1]={}, Vmax={}, dt={}, X0[-1]+Vmax*dt={} Pmax > X0[-1] + Vmax*dt' + msg = msg.format(Pmax, X0[-1], Vmax, dt, X0[-1]+Vmax*dt) assert Pmax <= X0[-1] + Vmax*dt + 10*eps, msg def _compute_remesh(self): - pos = self.dpos - sinputs = self.advected_fields_in - soutputs = self.advected_fields_out - dsinputs = self.dadvected_fields_in - dsoutputs = self.dadvected_fields_out + pos = self.dpos + sinputs = self.advected_fields_in + soutputs = self.advected_fields_out + dsinputs = self.dadvected_fields_in + dsoutputs = self.dadvected_fields_out is_inplace = self.is_inplace - rtmp = self.drtmp - itmp = self.ditmp + rtmp = self.drtmp + itmp = self.ditmp if self.is_inplace: - stmp = self.dstmp + stmp = self.dstmp ixtmp = self.dixtmp - cr = self.compute_granularity - (_,_,_,_,X0) = self._velocity_mesh_attributes + cr = self.compute_granularity + (_, _, _, _, X0) = self._velocity_mesh_attributes (mesh_it, dx, inv_dx, compute_view, N0) = self._scalar_mesh_attributes if DEBUG: @@ -343,41 +344,41 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat is_periodic = self.is_periodic - R0,R1 = rtmp[:2] + R0, R1 = rtmp[:2] I0 = itmp[0] Ix = ixtmp[0] if is_inplace: S = stmp is_periodic = False - N = R0.shape[-1] + N = R0.shape[-1] (in_compute_slices, out_compute_slices) = self._inout_compute_slices (in_ghosts, out_ghosts) = self._inout_ghosts (in_shapes, out_shapes) = self._inout_shapes - input_buffer_views = {} + input_buffer_views = {} output_buffer_views = {} last_scalar_axe_mesh_indices = self._last_scalar_axe_mesh_indices pos[...] -= X0[0] - for (idx,_,I,_) in mesh_it.iter_compute_mesh(): + for (idx, _, I, _) in mesh_it.iter_compute_mesh(): Pi = pos[idx] - R0[...] = Pi + R0[...] = Pi R0[...] *= inv_dx - I0[...] = npw.floor(R0).astype(I0.dtype) + I0[...] = npw.floor(R0).astype(I0.dtype) R0[...] -= I0 - R0[...] *= -1 # we need -alpha for the left point + R0[...] *= -1 # we need -alpha for the left point if DEBUG: Imin, Imax = I0.min(), I0.max() amin, amax = R0.min(), R0.max() assert (Imin >= -scalar_advection_ghosts), '{} >= -{}'.format(Imin, - scalar_advection_ghosts) - assert (Imax < N+scalar_advection_ghosts), '{} < {}+{}'.format(Imax, N, - scalar_advection_ghosts) + scalar_advection_ghosts) + assert (Imax < N+scalar_advection_ghosts), '{} < {}+{}'.format(Imax, N, + scalar_advection_ghosts) assert (amin >= -1.0), amin assert (amax <= 0.0), amax @@ -386,18 +387,18 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat # prebuild data views for ifield, ofield in zip(sinputs, soutputs): - Sin = dsinputs[ifield] + Sin = dsinputs[ifield] Sout = dsoutputs[ofield] - sin_view = tuple(idx[i] + in_ghosts[Sin][i] for i in range(cr)) - sin_view += in_compute_slices[Sin][cr:] + sin_view = tuple(idx[i] + in_ghosts[Sin][i] for i in range(cr)) + sin_view += in_compute_slices[Sin][cr:] dG = out_ghosts[Sout][-1] - remesh_ghosts sout_view = tuple(idx[i] + out_ghosts[Sout][i] for i in range(cr)) sout_view += out_compute_slices[Sout][cr:-1] sout_view += (slice(dG, out_shapes[Sout][-1]-dG),) - in_views = tuple(buf[sin_view] for buf in Sin.buffers) + in_views = tuple(buf[sin_view] for buf in Sin.buffers) out_views = tuple(buf[sout_view] for buf in Sout.buffers) input_buffer_views[ifield] = in_views output_buffer_views[ofield] = out_views @@ -407,13 +408,13 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat R0[...] += 1 R1[...] = remesh_kernel.gamma(R0) - sid=0 + sid = 0 for ifield, ofield in zip(sinputs, soutputs): for k in range(ifield.nb_components): - sin = input_buffer_views[ifield][k] + sin = input_buffer_views[ifield][k] sout = output_buffer_views[ofield][k] Si = (S[sid] if is_inplace else sout) - if (q==-P+1): + if (q == -P+1): Si[...] = 0.0 for in_idx in last_scalar_axe_mesh_indices: Ix[...] = I0[..., in_idx] @@ -422,33 +423,34 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat Ix += N Ix %= N if DEBUG: - assert npw.all(Ix>=0), 'ix={}, ix < 0'.format(ix) + assert npw.all(Ix >= 0), 'ix={}, ix < 0'.format(Ix) Si[I[:-1]+(Ix,)] += (R1[..., in_idx]*sin[..., in_idx]) - if (q==P) and is_inplace: + if (q == P) and is_inplace: sout[...] = Si - sid+=1 + sid += 1 @debug def handle_method(self, method): super(PythonDirectionalAdvection, self).handle_method(method) cr = method.pop(ComputeGranularity) assert 0 <= cr <= self.velocity.dim-1 - msg='Interpolation {}.{} is not supported for operator {}.'.format( - self.interp.__class__.__name__, self.interp, self.__class__.__name__) + msg = 'Interpolation {}.{} is not supported for operator {}.'.format( + self.interp.__class__.__name__, self.interp, self.__class__.__name__) assert self.interp in (Interpolation.LINEAR, PolynomialInterpolation.LINEAR), msg self.compute_granularity = cr @classmethod def default_method(cls): - dm0 = super(PythonDirectionalAdvection, cls).default_method() - dm = {ComputeGranularity : 0} + dm0 = super(PythonDirectionalAdvection, cls).default_method() + dm = {ComputeGranularity: 0} dm.update(dm0) return dm + @classmethod def available_methods(cls): - dm0 = super(PythonDirectionalAdvection, cls).available_methods() - dm = { ComputeGranularity: (0,)+cls.supported_dimensions() } + dm0 = super(PythonDirectionalAdvection, cls).available_methods() + dm = {ComputeGranularity: (0,)+cls.supported_dimensions()} dm.update(dm0) return dm diff --git a/hysop/backend/host/python/operator/spatial_filtering.py b/hysop/backend/host/python/operator/spatial_filtering.py index 77d23cef7..4e82cbe19 100644 --- a/hysop/backend/host/python/operator/spatial_filtering.py +++ b/hysop/backend/host/python/operator/spatial_filtering.py @@ -1,4 +1,3 @@ - import numpy as np from hysop.tools.types import check_instance, first_not_None from hysop.tools.decorators import debug @@ -8,11 +7,11 @@ from hysop.fields.continuous_field import Field from hysop.parameters.parameter import Parameter from hysop.topology.cartesian_descriptor import CartesianTopologyDescriptors from hysop.operator.base.spatial_filtering import ( - PolynomialInterpolationFilterBase, - RemeshRestrictionFilterBase, - SpectralRestrictionFilterBase, - SubgridRestrictionFilterBase, - PolynomialRestrictionFilterBase) + PolynomialInterpolationFilterBase, + RemeshRestrictionFilterBase, + SpectralRestrictionFilterBase, + SubgridRestrictionFilterBase, + PolynomialRestrictionFilterBase) class PythonPolynomialInterpolationFilter(PolynomialInterpolationFilterBase, HostOperator): @@ -27,16 +26,16 @@ class PythonPolynomialInterpolationFilter(PolynomialInterpolationFilterBase, Hos def apply(self, **kwds): """Apply analytic formula.""" super(PythonPolynomialInterpolationFilter, self).apply(**kwds) - fin = self.fin - fout = self.fout + fin = self.fin + fout = self.fout periodicity = self.dFin.periodicity - gr, n = self.subgrid_interpolator.gr, self.subgrid_interpolator.n - Wr = self.Wr + gr, n = self.subgrid_interpolator.gr, self.subgrid_interpolator.n + Wr = self.Wr for idx in np.ndindex(*self.iter_shape): - oslc = tuple(slice(j*gr[i], (j+1)*gr[i], 1) for i,j in enumerate(idx)) + oslc = tuple(slice(j*gr[i], (j+1)*gr[i], 1) for i, j in enumerate(idx)) islc = tuple(slice(periodicity[i]+j, periodicity[i]+j+n[i], 1) - for i,j in enumerate(idx)) + for i, j in enumerate(idx)) fout[oslc] = Wr.dot(fin[islc].ravel()).reshape(gr) self.dFout.exchange_ghosts() @@ -49,20 +48,20 @@ class PythonPolynomialRestrictionFilter(PolynomialRestrictionFilterBase, HostOpe super(PythonPolynomialRestrictionFilter, self).discretize(**kwds) SR = self.subgrid_restrictor self.Rr = SR.Rr.astype(self.dtype) / SR.GR - assert (self.Rr.shape == tuple(2*gi+1 for gi in SR.ghosts)), Rr.shape + assert (self.Rr.shape == tuple(2*gi+1 for gi in SR.ghosts)), self.Rr.shape @op_apply def apply(self, **kwds): """Apply analytic formula.""" super(PythonPolynomialRestrictionFilter, self).apply(**kwds) - fin = self.fin - fout = self.fout - gr = self.subgrid_restrictor.gr - Rr = self.Rr + fin = self.fin + fout = self.fout + gr = self.subgrid_restrictor.gr + Rr = self.Rr rshape = Rr.shape for idx in np.ndindex(*self.iter_shape): - islc = tuple(slice(j*gr[i], j*gr[i]+rshape[i], 1) for i,j in enumerate(idx)) + islc = tuple(slice(j*gr[i], j*gr[i]+rshape[i], 1) for i, j in enumerate(idx)) fout[idx] = (Rr*fin[islc]).sum() self.dFout.exchange_ghosts() @@ -75,13 +74,13 @@ class PythonRemeshRestrictionFilter(RemeshRestrictionFilterBase, HostOperator): def setup(self, **kwds): super(PythonRemeshRestrictionFilter, self).setup(**kwds) - fin = self.fin + fin = self.fin iratio = self.iratio oshape = self.fout.shape dviews = () for (idx, Wi) in self.nz_weights.items(): - slc = tuple(slice(i, i+r*s, r) for (i,s,r) in zip(idx, oshape, iratio)) + slc = tuple(slice(i, i+r*s, r) for (i, s, r) in zip(idx, oshape, iratio)) dviews += ((Wi, fin[slc]),) self.data_views = dviews @@ -136,4 +135,3 @@ class PythonSubgridRestrictionFilter(SubgridRestrictionFilterBase, HostOperator) super(PythonSubgridRestrictionFilter, self).apply(**kwds) self.fout[...] = self.fin[...] self.dFout.exchange_ghosts() - diff --git a/hysop/core/arrays/array.py b/hysop/core/arrays/array.py index 1c6dcac26..c6de8c68b 100644 --- a/hysop/core/arrays/array.py +++ b/hysop/core/arrays/array.py @@ -1,7 +1,7 @@ import numpy as np from abc import ABCMeta, abstractmethod -from hysop.constants import MemoryOrdering -from hysop.constants import DirectionLabels, default_order +from hysop.constants import MemoryOrdering +from hysop.constants import DirectionLabels, default_order from hysop.tools.misc import prod from hysop.tools.types import check_instance from hysop.tools.numpywrappers import slices_empty @@ -55,10 +55,10 @@ class Array(object, metaclass=ABCMeta): from hysop.core.arrays.all import ArrayBackend check_instance(backend, ArrayBackend) - super(Array,self).__init__(**kwds) + super(Array, self).__init__(**kwds) - self._handle=handle - self._backend=backend + self._handle = handle + self._backend = backend if hasattr(handle, '__len__'): setattr(self, '__len__', handle.__len__) @@ -68,8 +68,8 @@ class Array(object, metaclass=ABCMeta): Return underlying implementation of this buffer. """ if not hasattr(self, '_handle'): - msg='{} has no _handle defined.' - msg=msg.format(self.__class__) + msg = '{} has no _handle defined.' + msg = msg.format(self.__class__) raise RuntimeError(msg) return self._handle @@ -78,27 +78,30 @@ class Array(object, metaclass=ABCMeta): Return the backend corresponding to this array. """ if not hasattr(self, '_backend'): - msg='{} has no _backend defined.' - msg=msg.format(self.__class__) + msg = '{} has no _backend defined.' + msg = msg.format(self.__class__) raise RuntimeError(msg) return self._backend - handle = property(get_handle) + handle = property(get_handle) backend = property(get_backend) def __int__(self): """Return scalar value as an int.""" - assert self.size==1 + assert self.size == 1 return int(self.get()) + def __float__(self): - assert self.size==1 + assert self.size == 1 """Return scalar value as a float.""" - assert self.size==1 + assert self.size == 1 return float(self.get()) + def __complex__(self): """Return scalar value as a complex.""" - assert self.size==1 + assert self.size == 1 return complex(self.get()) + def __nonzero__(self): """Called to implement truth value testing and the built-in operation bool().""" if self.shape == (): @@ -109,15 +112,15 @@ class Array(object, metaclass=ABCMeta): @classmethod def _not_implemented_yet(cls, funname): msg = '{}::{} has not been implemented yet.' - msg=msg.format(cls.__name__, funname) + msg = msg.format(cls.__name__, funname) raise NotImplementedError(msg) @classmethod def _unsupported_argument(cls, fname, argname, arg, default_value=None): if arg != default_value: msg = '{}::{}() has been implemented but argument \'{}\' is not ' - msg+= 'supported and should be set to {}.' - msg=msg.format(cls.__name__, fname, argname, default_value) + msg += 'supported and should be set to {}.' + msg = msg.format(cls.__name__, fname, argname, default_value) raise NotImplementedError(msg) def wrap(self, handle): @@ -254,7 +257,7 @@ class Array(object, metaclass=ABCMeta): """ Same as self.transpose(), except that self is returned if self.ndim < 2. """ - if self.ndim<2: + if self.ndim < 2: return self else: return self.transpose() @@ -277,28 +280,27 @@ class Array(object, metaclass=ABCMeta): """ return self.itemsize*self.size - # array properties to be (re)defined - ndim = property(get_ndim) - shape = property(get_shape, set_shape) - offset = property(get_offset) + ndim = property(get_ndim) + shape = property(get_shape, set_shape) + offset = property(get_offset) strides = property(get_strides) - data = property(get_data) - base = property(get_base) - dtype = property(get_dtype) + data = property(get_data) + base = property(get_base) + dtype = property(get_dtype) int_ptr = property(get_int_ptr) # optional array properties - flags = property(get_flags) - imag = property(get_imag) - real = property(get_real) - ctypes = property(get_ctypes) + flags = property(get_flags) + imag = property(get_imag) + real = property(get_real) + ctypes = property(get_ctypes) # deduced array properties, may be redefined - size = property(get_size) + size = property(get_size) itemsize = property(get_itemsize) - nbytes = property(get_nbytes) - T = property(get_T) + nbytes = property(get_nbytes) + T = property(get_T) @abstractmethod def get(self, handle=False): @@ -340,46 +342,45 @@ class Array(object, metaclass=ABCMeta): If dimension is one, default_order is returned. """ dim = self.ndim - if dim==1: + if dim == 1: return default_order else: axes = self.logical_axes() c_axes = np.arange(dim) - if (axes==c_axes).all(): + if (axes == c_axes).all(): return MemoryOrdering.C_CONTIGUOUS - elif (axes==c_axes[::-1]).all(): + elif (axes == c_axes[::-1]).all(): return MemoryOrdering.F_CONTIGUOUS else: return MemoryOrdering.OUT_OF_ORDER order = property(get_order) - def logical_axes(self): - """ - Logical axes state ids, in numpy convention, as a tuple. - Axe 0 is the slowest varying index, last axe is the fastest varying index. - ie 3D C-ordering is [2,1,0] - 3D fortran-ordering is [0,1,2] - - Thoses are the axes seen as a numpy view on memory, *only* strides are permutated for access, - Those axes are found by reverse argsorting the array strides, using a stable sorting algorithm. - - Logical permutations can be achieved through numpy-like routines present in ArrayBackend: - *np.transpose - *np.rollaxis - *np.moveaxis - *np.swapaxes - and Array property: - *Array.T - - See Array.physical_axes() for physical permutations of axes. - See Array.axes() for overall array permutation. - See https://docs.scipy.org/doc/numpy-1.10.0/reference/internals.html for more information - about C versus Fortran ordering in numpy. - """ - strides = np.asarray(self.strides, dtype=np.int64) - axes = np.argsort(-strides, kind='mergesort') - return tuple(axes.tolist()) + """ + Logical axes state ids, in numpy convention, as a tuple. + Axe 0 is the slowest varying index, last axe is the fastest varying index. + ie 3D C-ordering is [2,1,0] + 3D fortran-ordering is [0,1,2] + + Thoses are the axes seen as a numpy view on memory, *only* strides are permutated for access, + Those axes are found by reverse argsorting the array strides, using a stable sorting algorithm. + + Logical permutations can be achieved through numpy-like routines present in ArrayBackend: + *np.transpose + *np.rollaxis + *np.moveaxis + *np.swapaxes + and Array property: + *Array.T + + See Array.physical_axes() for physical permutations of axes. + See Array.axes() for overall array permutation. + See https://docs.scipy.org/doc/numpy-1.10.0/reference/internals.html for more information + about C versus Fortran ordering in numpy. + """ + strides = np.asarray(self.strides, dtype=np.int64) + axes = np.argsort(-strides, kind='mergesort') + return tuple(axes.tolist()) def is_fp(self): """ @@ -391,13 +392,13 @@ class Array(object, metaclass=ABCMeta): """ Return true if dimension is one or if current order is C_CONTIGUOUS. """ - return (self.ndim==1) or (self.order == MemoryOrdering.C_CONTIGUOUS) + return (self.ndim == 1) or (self.order == MemoryOrdering.C_CONTIGUOUS) def is_fortran_contiguous(self): """ Return true if dimension is one or if current order is F_CONTIGUOUS. """ - return (self.ndim==1) or (self.order == MemoryOrdering.F_CONTIGUOUS) + return (self.ndim == 1) or (self.order == MemoryOrdering.F_CONTIGUOUS) def is_hysop_contiguous(self): """ @@ -415,8 +416,7 @@ class Array(object, metaclass=ABCMeta): """ Copy data from buffer to dst """ - self.backend.memcpy(dst,self, **kargs) - + self.backend.memcpy(dst, self, **kargs) def transpose_to_state(self, state, **kargs): """ @@ -432,8 +432,6 @@ class Array(object, metaclass=ABCMeta): axes.append(origin.index(axe)) return self.transpose(axes=axes) - - # np.ndarray like methods def all(self, axis=None, out=None, **kargs): @@ -465,7 +463,7 @@ class Array(object, metaclass=ABCMeta): Returns the indices that would partition this array. """ return self.backend.argpartition(a=self, kth=kth, axis=axis, kind=kind, - order=order, **kargs) + order=order, **kargs) def argsort(self, axis=-1, kind='quicksort', order=None, **kargs): """ @@ -474,12 +472,12 @@ class Array(object, metaclass=ABCMeta): return self.backend.argsort(a=self, axis=axis, kind=kind, order=order, **kargs) def astype(self, dtype, order=MemoryOrdering.SAME_ORDER, casting='unsafe', subok=True, - copy=True, **kargs): + copy=True, **kargs): """ Copy of the array, cast to a specified type. """ return self.backend.astype(a=self, dtype=dtype, order=order, casting=casting, - subok=subok, copy=copy, **kargs) + subok=subok, copy=copy, **kargs) def byteswap(self, inplace=False, **kargs): """ @@ -506,7 +504,7 @@ class Array(object, metaclass=ABCMeta): Return selected slices of this array along given axis. """ return self.backend.compress(a=self, condition=condition, axis=axis, out=out, - **kargs) + **kargs) def conj(self, out=None, **kargs): """ @@ -548,7 +546,7 @@ class Array(object, metaclass=ABCMeta): """ Dot product of two arrays. """ - return self.backend.dot(a=self, b= b, out=out, **kargs) + return self.backend.dot(a=self, b=b, out=out, **kargs) def dump(self, file, **kargs): """ @@ -574,7 +572,6 @@ class Array(object, metaclass=ABCMeta): """ return self.backend.flatten(a=self, order=order, **kargs) - def max(self, axis=None, out=None, **kargs): """ Return the maximum along a given axis. @@ -605,7 +602,7 @@ class Array(object, metaclass=ABCMeta): in kth position is in the position it would be in a sorted array. """ return self.backend.partition(a=self, kth=kth, axis=axis, kind=kind, - order=order, **kargs) + order=order, **kargs) def prod(self, axis=None, dtype=None, out=None, **kargs): """ @@ -672,7 +669,7 @@ class Array(object, metaclass=ABCMeta): Returns the standard deviation of the array elements along given axis. """ return self.backend.std(a=self, axis=axis, dtype=dtype, out=out, - ddof=ddof) + ddof=ddof) def sum(self, axis=None, dtype=None, out=None, **kargs): """ @@ -691,7 +688,7 @@ class Array(object, metaclass=ABCMeta): Return the sum along diagonals of the array. """ return self.backend.trace(a=self, offset=offset, - axis1=axis1, axis2=axis2, dtype=dtype, out=out, **kargs) + axis1=axis1, axis2=axis2, dtype=dtype, out=out, **kargs) def transpose(self, axes=None, **kargs): """ @@ -705,18 +702,18 @@ class Array(object, metaclass=ABCMeta): """ return self.backend.var(a=self, axis=axis, dtype=dtype, out=out, ddof=ddof, **kargs) + # Array restricted methods - ## Array restricted methods def setflags(self, write=None, align=None, uic=None): """ Set array flags WRITEABLE, ALIGNED, and UPDATEIFCOPY, respectively. """ - msg='{}::set_flags() should not be called.' - msg=msg.format(self.__class__.__name__) + msg = '{}::set_flags() should not be called.' + msg = msg.format(self.__class__.__name__) raise RuntimeError(msg) + # Array specific unimplemented methods - ## Array specific unimplemented methods def tofile(self, fid, sep='', format='%s', **kargs): """ Write array to a file as text or binary (default). @@ -724,35 +721,39 @@ class Array(object, metaclass=ABCMeta): Information on endianness and precision is lost. """ self.__class__.not_implemented_yet('tofile') + def tolist(self, **kargs): """ Return the array as a possibly nested list. """ self.__class__not_implemented_yet('tolist') + def tostring(self, order=MemoryOrdering.SAME_ORDER, **kargs): """ Construct Python bytes containing the raw data bytes in the array. """ self.__class__not_implemented_yet('tostring') - def getfield(dtype, offset=0): + def getfield(self, dtype, offset=0): """ Returns a field of the given array as a certain type. """ self._not_implemented_yet('get_field') - def setfield(val, dtype, offset=0): + + def setfield(self, val, dtype, offset=0): """ Put a value into a specified place in a field defined by a data-type. Place val into a's field defined by dtype and beginning offset bytes into the field. """ self._not_implemented_yet('set_field') - def item(*args): + def item(self, *args): """ Copy an element of an array to a standard Python scalar and return it. """ self._not_implemented_yet('item') - def itemset(*args): + + def itemset(self, *args): """ Insert scalar into an array (scalar is cast to array's dtype, if possible) """ @@ -763,6 +764,7 @@ class Array(object, metaclass=ABCMeta): Set a.flatn = valuesn for all n in indices. """ self._not_implemented_yet('put') + def take(self, indices, axis=None, out=None, mode='raise', **kargs): """ Return an array formed from the elements of a at the given indices. @@ -774,12 +776,6 @@ class Array(object, metaclass=ABCMeta): New view of array with the same data. """ self._not_implemented_yet('view') - def astype(self, dtype, order=MemoryOrdering.SAME_ORDER, - casting='unsafe', subok=True, copy=True, **kargs): - """ - Copy of the array, cast to a specified type. - """ - self._not_implemented_yet('astype') def tobytes(self, order=MemoryOrdering.C_CONTIGUOUS): """ @@ -787,104 +783,137 @@ class Array(object, metaclass=ABCMeta): """ self._not_implemented_yet('tobytes') - # logical operators + def __eq__(self, other): return self.backend.equal(self, other) + def __ne__(self, other): return self.backend.not_equal(self, other) + def __le__(self, other): return self.backend.less_equal(self, other) + def __ge__(self, other): return self.backend.greater_equal(self, other) + def __lt__(self, other): return self.backend.less(self, other) + def __gt__(self, other): return self.backend.greater(self, other) # arithmetic operators def __neg__(self): return self.backend.negative(self) + def __abs__(self): return self.backend.absolute(self) + def __invert__(self): return self.backend.invert(self) def __add__(self, other): return self.backend.add(self, other) + def __sub__(self, other): return self.backend.subtract(self, other) + def __mul__(self, other): return self.backend.multiply(self, other) + def __pow__(self, other): return self.backend.power(self, other) + def __floordiv__(self, other): return self.backend.floor_divide(self, other) + def __truediv__(self, other): return self.backend.divide(self, other) + def __mod__(self, other): return self.backend.mod(self, other) - def __and__ (self, other): - return self.backend.bitwise_and(self,other) - def __xor__ (self, other): - return self.backend.bitwise_xor(self,other) - def __or__ (self, other): - return self.backend.bitwise_or(self,other) - def __lshift__ (self, other): - return self.backend.left_shift(self,other) - def __rshift__ (self, other): - return self.backend.right_shift(self,other) + def __and__(self, other): + return self.backend.bitwise_and(self, other) + + def __xor__(self, other): + return self.backend.bitwise_xor(self, other) + + def __or__(self, other): + return self.backend.bitwise_or(self, other) + + def __lshift__(self, other): + return self.backend.left_shift(self, other) + + def __rshift__(self, other): + return self.backend.right_shift(self, other) def __radd__(self, other): return self.backend.add(other, self) + def __rsub__(self, other): return self.backend.subtract(other, self) + def __rmul__(self, other): return self.backend.multiply(other, self) + def __rpow__(self, other): return self.backend.power(other, self) + def __rfloordiv__(self, other): return self.backend.floor_divide(other, self) + def __rtruediv__(self, other): return self.backend.divide(other, self) + def __rmod__(self, other): return self.backend.mod(other, self) - def __rand__ (other, self): + def __rand__(other, self): return self.backend.bitwise_and(other, self) - def __rxor__ (other, self): + + def __rxor__(other, self): return self.backend.bitwise_xor(other, self) - def __ror__ (other, self): + + def __ror__(other, self): return self.backend.bitwise_or(other, self) - def __rlshift__ (other, self): + + def __rlshift__(other, self): return self.backend.left_shift(other, self) - def __rrshift__ (other, self): + + def __rrshift__(other, self): return self.backend.right_shift(other, self) def __iadd__(self, other): return self.backend.add(self, other, out=self) + def __isub__(self, other): return self.backend.subtract(self, other, out=self) + def __imul__(self, other): return self.backend.multiply(self, other, out=self) + def __ipow__(self, other): return self.backend.power(self, other, a=self) + def __ifloordiv__(self, other): return self.backend.floor_divide(self, other, out=self) + def __idiv__(self, other): return self.backend.divide(self, other, out=self) + def __imod__(self, other): return self.backend.mod(self, other, out=self) - def __str__(self): return self._handle.__str__() + def __repr__(self): return self._handle.__repr__() def __setitem__(self, slices, value): - if any( (s==0) for s in self[slices].shape ): + if any((s == 0) for s in self[slices].shape): return self._call('__setitem__', slices, value) diff --git a/hysop/core/arrays/array_backend.py b/hysop/core/arrays/array_backend.py index a349765ef..b47b96127 100644 --- a/hysop/core/arrays/array_backend.py +++ b/hysop/core/arrays/array_backend.py @@ -6,12 +6,13 @@ from hysop.constants import default_order, MemoryOrdering, Backend from hysop.constants import HYSOP_REAL, HYSOP_COMPLEX from hysop.constants import HYSOP_INTEGER, HYSOP_INDEX, HYSOP_DIM, HYSOP_BOOL from hysop.tools.misc import prod -from hysop.tools.types import check_instance, to_tuple, to_list +from hysop.tools.types import check_instance, to_tuple, to_list from hysop.tools.handle import TaggedObject from hysop.tools.numerics import is_fp, is_complex, match_float_type, \ - match_complex_type, complex_to_float_dtype + match_complex_type, complex_to_float_dtype from hysop.core.memory.allocator import AllocatorBase + class ArrayBackend(TaggedObject, metaclass=ABCMeta): """ Interface of an abstract array backend. @@ -96,22 +97,22 @@ class ArrayBackend(TaggedObject, metaclass=ABCMeta): Alignment should non zero and a power of two, or None in which case it will be set to 1. """ - bytes_per_elem = dtype.itemsize - min_alignment = min_alignment or 1 + bytes_per_elem = dtype.itemsize + min_alignment = min_alignment or 1 min_alloc_bytes = prod(shape) * bytes_per_elem - msg0='min_alignment is not a power of two, got {}.' - msg1='bytes_per_elem is not a power of two, got {}.' - assert min_alloc_bytes>=1, 'min_alloc_bytes <= 0.' - assert min_alignment>=1, 'min_alignment <= 0.' - assert (min_alignment & (min_alignment-1))==0, msg0.format(min_alignment) - assert (bytes_per_elem & (bytes_per_elem-1))==0, msg1.format(bytes_per_elem) + msg0 = 'min_alignment is not a power of two, got {}.' + msg1 = 'bytes_per_elem is not a power of two, got {}.' + assert min_alloc_bytes >= 1, 'min_alloc_bytes <= 0.' + assert min_alignment >= 1, 'min_alignment <= 0.' + assert (min_alignment & (min_alignment-1)) == 0, msg0.format(min_alignment) + assert (bytes_per_elem & (bytes_per_elem-1)) == 0, msg1.format(bytes_per_elem) alignment = max(min_alignment, bytes_per_elem) - size = min_alloc_bytes + size = min_alloc_bytes nbytes = min_alloc_bytes + alignment - 1 - return (size,nbytes,alignment) + return (size, nbytes, alignment) @staticmethod def _register_backend(handle, backend_cls): @@ -124,31 +125,31 @@ class ArrayBackend(TaggedObject, metaclass=ABCMeta): @classmethod def _not_implemented_yet(cls, funname): msg = '{}::{}() has not been implemented yet.' - msg=msg.format(cls.__name__, funname) + msg = msg.format(cls.__name__, funname) raise NotImplementedError(msg) @classmethod def _unsupported_argument(cls, fname, argname, arg, default_value=None): if arg != default_value: msg = '{}::{}() has been implemented but argument \'{}\' is not ' - msg+= 'supported and should be set to {}.' - msg=msg.format(cls.__name__, fname, argname, default_value) + msg += 'supported and should be set to {}.' + msg = msg.format(cls.__name__, fname, argname, default_value) raise NotImplementedError(msg) @classmethod def _check_argtype(cls, fname, argname, arg, argself): if not isinstance(argself, tuple): - argself=(argself,) + argself = (argself,) if not arg.__class__ in argself: msg = '{}::{}(): argument type mismatch, expected a {}' - msg+= ' for argument \'{}\' but got a {}.' - msg=msg.format(cls.__name__, fname, argself, argname, arg.__class__) + msg += ' for argument \'{}\' but got a {}.' + msg = msg.format(cls.__name__, fname, argself, argname, arg.__class__) raise TypeError(msg) def __new__(cls, allocator, **kwds): return super(ArrayBackend, cls).__new__(cls, tag_prefix='bk', - tagged_cls=ArrayBackend, - **kwds) + tagged_cls=ArrayBackend, + **kwds) def __init__(self, allocator, **kwds): """ @@ -164,7 +165,7 @@ class ArrayBackend(TaggedObject, metaclass=ABCMeta): for kind in kinds: if (kind == Backend.HOST): return default_host_array_backend - msg='Could not find any known default backend kind.' + msg = 'Could not find any known default backend kind.' raise RuntimeError(msg) def any_backend_from_kind(self, *kinds): @@ -178,19 +179,21 @@ class ArrayBackend(TaggedObject, metaclass=ABCMeta): return NotImplemented eq = (self._allocator is other._allocator) return eq + def __ne__(self, other): if (not other.__class__ is self.__class__): return NotImplemented ne = (self._allocator is not other._allocator) return ne + def __hash__(self): return id(self._allocator) @abstractmethod def get_host_array_backend(self): - msg='get_host_array_backend() not implemented in {}.'.format(self.__class__) + msg = 'get_host_array_backend() not implemented in {}.'.format(self.__class__) raise NotImplementedError(msg) - host_array_backend=property(get_host_array_backend) + host_array_backend = property(get_host_array_backend) @abstractmethod def short_description(self): @@ -211,7 +214,6 @@ class ArrayBackend(TaggedObject, metaclass=ABCMeta): allocator = property(get_allocator) - def _prepare_args(self, *args, **kargs): """ Prepare all arguments for a call. @@ -219,12 +221,12 @@ class ArrayBackend(TaggedObject, metaclass=ABCMeta): if ArrayBackend.__DEBUG: print('__prepare_args') args = list(args) - for i,arg in enumerate(args): - args[i] = self._arg(arg) - for k,arg in kargs.items(): + for i, arg in enumerate(args): + args[i] = self._arg(arg) + for k, arg in kargs.items(): kargs[k] = self._arg(arg) if ('synchronize' in kargs): - msg='synchronize cannot be an argument to pyopencl.' + msg = 'synchronize cannot be an argument to pyopencl.' raise RuntimeError(msg) return tuple(args), kargs @@ -232,11 +234,11 @@ class ArrayBackend(TaggedObject, metaclass=ABCMeta): """ Wrap returned value(s) if they match a backend array. """ - if isinstance(ret,tuple): + if isinstance(ret, tuple): if ArrayBackend.__DEBUG: print('__return', [r.__class__.__name__ for r in ret]) values = list(ret) - for i,val in enumerate(values): + for i, val in enumerate(values): if self.can_wrap(val): values[i] = self.wrap(val, **kargs) elif self.host_array_backend.can_wrap(val): @@ -276,23 +278,23 @@ class ArrayBackend(TaggedObject, metaclass=ABCMeta): name = val.__name__ else: name = val - if hasattr(val,'__class__') and \ - val.__class__ in [type,int,float,np.dtype,list,tuple,set]: + if hasattr(val, '__class__') and \ + val.__class__ in [type, int, float, np.dtype, list, tuple, set]: name += ' = {}'.format(val) return name def format(val): if val is None: return 'Uninitialized' - if isinstance(val,tuple): + if isinstance(val, tuple): return ', '.join([get_name(v) for v in val]) elif isinstance(val, dict): - val = ['{} => {}'.format(k,get_name(val[k])) - for k in sorted(val.keys())] + val = ['{} => {}'.format(k, get_name(val[k])) + for k in sorted(val.keys())] return '\n\t'.join(val) - msg= \ - ''' + msg = \ + ''' Call of {} ({}) from class self={} failed Before argument processing: args: {} @@ -308,15 +310,14 @@ Call of {} ({}) from class self={} failed Exception was: {} '''.format(functor, functor.__name__, self.__class__.__name__, - format(varargs), format(kwargs), format(_ret), - format(args), format(kargs), format(ret), - e) + format(varargs), format(kwargs), format(_ret), + format(args), format(kargs), format(ret), + e) print(msg) raise return ret - def _alloc_outputs(self, fname, kargs): if ArrayBackend.__DEBUG: print('__begin allocs') @@ -325,26 +326,26 @@ Exception was: input_dtypes = {} output_arg_names = [] for argname, arg in kargs.items(): - if (argname.find('out')>=0): + if (argname.find('out') >= 0): if (arg is None): output_arg_names.append(argname) elif self.can_wrap(arg): arg = self.wrap(arg) - input_dtypes[argname]=arg.dtype + input_dtypes[argname] = arg.dtype shapes.append(arg.shape) orders.append(arg.order) if not output_arg_names: return - if not all(shape==shapes[0] for shape in shapes): - msg='Shape mismatch for array operands:\n {}'.format(shapes) + if not all(shape == shapes[0] for shape in shapes): + msg = 'Shape mismatch for array operands:\n {}'.format(shapes) raise RuntimeError(msg) else: shape = shapes[0] - if not all(order==orders[0] for order in orders): - order=MemoryOrdering.C_CONTIGUOUS + if not all(order == orders[0] for order in orders): + order = MemoryOrdering.C_CONTIGUOUS else: order = orders[0] @@ -354,14 +355,14 @@ Exception was: if (axis is None): shape = tuple() else: - axis = to_tuple(axis) + axis = to_tuple(axis) _shape = [] - for axe,s in enumerate(shape): + for axe, s in enumerate(shape): if axe not in axis: _shape.append(s) shape, _shape = to_tuple(_shape), shape else: - axis = None + axis = None _shape = shape if not shape: @@ -382,14 +383,14 @@ Exception was: ftypes_str = [] for ftype in ftypes: type_info = {} - fin,fout = ftype.split('->') + fin, fout = ftype.split('->') for typechar in fin+fout: try: type_info[typechar] = np.typename(typechar) except: type_info[typechar] = 'unknown type' - ss = '{}->{} ({})'.format(fin,fout, ', '.join('{}={}'.format(k,v) - for (k,v) in type_info.items())) + ss = '{}->{} ({})'.format(fin, fout, ', '.join('{}={}'.format(k, v) + for (k, v) in type_info.items())) ftypes_str.append(ss) print(' *ufunc available signatures:\n {}'.format('\n '.join(ftypes_str))) @@ -404,22 +405,22 @@ Exception was: output_dtypes = {} dtypes = tuple(input_dtypes.values()) - dtype = np.find_common_type([], dtypes) + dtype = np.find_common_type([], dtypes) - if fname.find('frexp')==0: + if fname.find('frexp') == 0: output_dtypes['out1'] = match_float_type(dtype) output_dtypes['out2'] = np.int32 else: # all outputs share the same dtype if fname in ['rint', 'floor', 'ceil', 'trunc', - 'exp', 'exp2', 'expm1', - 'log', 'log1p', 'log2', 'log10', - 'logaddexp', 'logaddexp2', 'ldexp', - 'sqrt', 'cbrt', 'hypot', - 'fabs', 'copysign', 'modf', - 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', 'arctan2', - 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh', - 'rad2deg', 'deg2rad']: + 'exp', 'exp2', 'expm1', + 'log', 'log1p', 'log2', 'log10', + 'logaddexp', 'logaddexp2', 'ldexp', + 'sqrt', 'cbrt', 'hypot', + 'fabs', 'copysign', 'modf', + 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', 'arctan2', + 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh', + 'rad2deg', 'deg2rad']: if is_complex(dtype): dtype = match_complex_type(dtype) else: @@ -428,7 +429,7 @@ Exception was: if is_complex(dtype): dtype = complex_to_float_dtype(dtype) elif fname in ['true_divide']: - if (dtypes[0]==dtypes[1]): + if (dtypes[0] == dtypes[1]): dtype0 = dtypes[0] if is_fp(dtype0) or is_complex(dtype0): dtype = dtype0 @@ -445,7 +446,6 @@ Exception was: ############################ # BACKEND SPECIFIC METHODS # - @abstractmethod def wrap(self, handle, **kargs): """ @@ -470,22 +470,21 @@ Exception was: if isinstance(arg, Array): return arg.handle elif isinstance(arg, MemoryOrdering): - if arg==MemoryOrdering.C_CONTIGUOUS: + if arg == MemoryOrdering.C_CONTIGUOUS: return 'C' - elif arg==MemoryOrdering.F_CONTIGUOUS: + elif arg == MemoryOrdering.F_CONTIGUOUS: return 'F' - elif arg==MemoryOrdering.SAME_ORDER: + elif arg == MemoryOrdering.SAME_ORDER: return 'K' - elif arg==MemoryOrdering.OUT_OF_ORDER: - msg='Unsupported memory ordering {}.'.format(arg) + elif arg == MemoryOrdering.OUT_OF_ORDER: + msg = 'Unsupported memory ordering {}.'.format(arg) raise RuntimeError(msg) else: - msg='Unknown memory ordering {}.'.format(arg) + msg = 'Unknown memory ordering {}.'.format(arg) raise RuntimeError(msg) else: return arg - def copyto(self, dst, src, **kargs): """ src is an Array @@ -496,7 +495,6 @@ Exception was: ############################## # EXTRA AND MODIFIED METHODS # - def fill(self, a, value): """ Fill the array with given value @@ -521,27 +519,26 @@ Exception was: elif backend_cls is HostArrayBackend: host_array_backend = dst.backend.host_array_backend src = host_array_backend.wrap(src) - host_array_backend.copyto(dst, src,**kargs) + host_array_backend.copyto(dst, src, **kargs) else: - msg='dst does not match registered backend for type {}.' - msg=msg.format(cls) + msg = 'dst does not match registered backend for type {}.' + msg = msg.format(cls) raise TypeError(msg) elif cls in [list, tuple, set] and isinstance(dst.backend, HostArrayBackend): src = dst.backend.asarray(src) dst.backend.copyto(dst, src, **kargs) else: print(src.__class__, dst.__class__) - msg='src cannot be converted to type Array.' + msg = 'src cannot be converted to type Array.' raise TypeError(msg) else: - msg='Neither src nor dst are of type Array.' + msg = 'Neither src nor dst are of type Array.' raise TypeError(msg) - ########################### # ARRAY CREATION ROUTINES # -## See https://docs.scipy.org/doc/numpy/reference/routines.array-creation.html +# See https://docs.scipy.org/doc/numpy/reference/routines.array-creation.html # Ones and zeros @@ -605,10 +602,10 @@ Exception was: """ self._not_implemented_yet('full_like') -#From existing data +# From existing data def array(self, object, dtype=None, copy=True, order=default_order, - subok=False, ndmin=0): + subok=False, ndmin=0): """ Create an array. """ @@ -662,20 +659,7 @@ Exception was: """ self._not_implemented_yet('fromiter') - def fromstring(self, string, dtype=HYSOP_REAL, count=-1, sep=''): - """ - A new 1-D array initialized from raw binary or text data in a string. - """ - self._not_implemented_yet('fromstring') - - def loadtxt(self, fname, dtype=HYSOP_REAL, comments='#', delimiter=None, - converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0): - """ - Load data from a text file. - """ - self._not_implemented_yet('loadtxt') - -#Numerical ranges +# Numerical ranges def arange(self, dtype=HYSOP_INTEGER, *args, **kargs): """ @@ -683,7 +667,6 @@ Exception was: """ self._not_implemented_yet('arange') - def linspace(self, start, stop, num=50, endpoint=True, retstep=False, dtype=HYSOP_REAL): """ Return evenly spaced numbers over a specified interval. @@ -708,7 +691,7 @@ Exception was: """ self._not_implemented_yet('meshgrid') -#Building matrices +# Building matrices def diag(self, v, k=0): """ @@ -749,9 +732,9 @@ Exception was: ############################### # ARRAY MANIPULATION ROUTINES # -## See https://docs.scipy.org/doc/numpy/reference/routines.array-manipulation.html +# See https://docs.scipy.org/doc/numpy/reference/routines.array-manipulation.html -#Changing array shape +# Changing array shape def reshape(self, a, newshape, order=default_order): """ @@ -765,8 +748,8 @@ Exception was: """ self._not_implemented_yet('ravel') -#Transpose-like operations -## /!\ those functions alter the logical transposition state /!\ +# Transpose-like operations +# /!\ those functions alter the logical transposition state /!\ def moveaxis(self, a, source, destination): """ @@ -774,7 +757,7 @@ Exception was: Axe 0 is the slowest varying index, last axe is the fastest varying index. """ axes = tuple(i for i in range(a.ndim)) - if source>destination: + if source > destination: axes = axes[:destination] + (source,) + axes[destination:source] + axes[source+1:] else: axes = axes[:source] + axes[source+1:destination] + (source,) + axes[destination:] @@ -808,7 +791,7 @@ Exception was: self._not_implemented_yet('transpose') -#Changing number of dimensions +# Changing number of dimensions def atleast_1d(self, *arys): """ @@ -852,7 +835,7 @@ Exception was: """ self._not_implemented_yet('squeeze') -#Changing kind of array +# Changing kind of array def asfortranarray(self, a, dtype=None): """ Return an array laid out in Fortran order in memory. @@ -883,7 +866,7 @@ Exception was: """ self._not_implemented_yet('require') -#Joining arrays +# Joining arrays def concatenate(self, a, axis=0): """ @@ -921,7 +904,7 @@ Exception was: """ self._not_implemented_yet('vstack') -#Splitting arrays +# Splitting arrays def split(self, ary, indices_or_sections, axis=0): """ @@ -953,7 +936,7 @@ Exception was: """ self._not_implemented_yet('vsplit') -#Tiling arrays +# Tiling arrays def tile(self, A, reps): """ @@ -967,7 +950,7 @@ Exception was: """ self._not_implemented_yet('repeat') -#Adding and removing elements +# Adding and removing elements def delete(self, arr, obj, axis=None): """ @@ -999,13 +982,7 @@ Exception was: """ self._not_implemented_yet('trim_zeros') - def unique(self, ar, return_index=False, return_inverse=False, return_counts=False): - """ - Find the unique elements of an array. - """ - self._not_implemented_yet('unique') - -#Rearranging elements +# Rearranging elements def flip(self, m, axis): """ @@ -1031,7 +1008,7 @@ Exception was: """ self._not_implemented_yet('roll') - def rot90(self, m, k=1, axes=(0,1)): + def rot90(self, m, k=1, axes=(0, 1)): """ Rotate an array by 90 degrees in the plane specified by axes. """ @@ -1040,7 +1017,7 @@ Exception was: ##################### # BINARY OPERATIONS # -## See https://docs.scipy.org/doc/numpy/reference/routines.bitwise.html +# See https://docs.scipy.org/doc/numpy/reference/routines.bitwise.html # Elementwise bit operations @@ -1062,7 +1039,6 @@ Exception was: """ self._not_implemented_yet('bitwise_xor') - def invert(self, x, out=None): """ Compute bit-wise inversion, or bit-wise NOT, element-wise. @@ -1081,7 +1057,7 @@ Exception was: """ self._not_implemented_yet('right_shift') -#Bit packing +# Bit packing def packbits(self, myarray, axis=None): """ @@ -1095,20 +1071,11 @@ Exception was: """ self._not_implemented_yet('unpackbits') -#Output formatting - - def binary_repr(self, num, width=None): - """ - Return the binary representation of the input number as a string. - """ - self._not_implemented_yet('binary_repr') - - ############################## # DISCRETE FOURIER TRANSFORM # -## See https://docs.scipy.org/doc/numpy/reference/routines.fft.html +# See https://docs.scipy.org/doc/numpy/reference/routines.fft.html -#Standard FFTs +# Standard FFTs def fft(self, a, n=None, axis=-1, norm=None): """ @@ -1146,7 +1113,7 @@ Exception was: """ self._not_implemented_yet('ifftn') -#Real FFTs +# Real FFTs def rfft(self, a, n=None, axis=-1, norm=None): """ @@ -1160,13 +1127,13 @@ Exception was: """ self._not_implemented_yet('irfft') - def rfft2(self, a, s=None, axes=(-2,-1), norm=None): + def rfft2(self, a, s=None, axes=(-2, -1), norm=None): """ Compute the 2-dimensional FFT of a real array. """ self._not_implemented_yet('rfft2') - def irfft2(self, a, s=None, axes=(-2,-1), norm=None): + def irfft2(self, a, s=None, axes=(-2, -1), norm=None): """ Compute the 2-dimensional inverse FFT of a real array. """ @@ -1184,7 +1151,7 @@ Exception was: """ self._not_implemented_yet('irfftn') -#Hermitian FFTs +# Hermitian FFTs def hfft(self, a, n=None, axis=-1, norm=None): """ @@ -1198,7 +1165,7 @@ Exception was: """ self._not_implemented_yet('ihfft') -#Helper routines +# Helper routines def fftfreq(self, n=None, d=1.0): """ @@ -1227,7 +1194,7 @@ Exception was: ########################## # FUNCTIONAL PROGRAMMING # -## See https://docs.scipy.org/doc/numpy/reference/routines.functional.html +# See https://docs.scipy.org/doc/numpy/reference/routines.functional.html def apply_along_axis(self, func1d, axis, arr, *args, **kwargs): @@ -1263,7 +1230,7 @@ Exception was: #################### # INPUT AND OUTPUT # -## See https://docs.scipy.org/doc/numpy/reference/routines.io.html +# See https://docs.scipy.org/doc/numpy/reference/routines.io.html # NumPy binary files (NPY, NPZ) @@ -1294,24 +1261,24 @@ Exception was: # Text files def loadtxt(self, dtype=HYSOP_REAL, comments='#', delimiter=None, - converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0): + converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0): """ Load data from a text file. """ self._not_implemented_yet('loadtxt') def savetxt(self, fname, X, fmt='%.18e', delimiter=' ', newline='\n', - header='', footer='', comments='# '): + header='', footer='', comments='# '): """ Save an array to a text file. """ self._not_implemented_yet('savetxt') def genfromtxt(self, fname, dtype=HYSOP_REAL, comments='#', delimiter=None, - skip_header=0, skip_footer=0, converters=None, missing_values=None, - filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, - replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', - unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None): + skip_header=0, skip_footer=0, converters=None, missing_values=None, + filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, + replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', + unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None): """ Load data from a text file, with missing values handled as specified. """ @@ -1332,7 +1299,7 @@ Exception was: # String formatting def array2string(self, a, max_line_width=None, precision=None, suppress_small=None, - separator=' ', prefix='', style=repr, formatter=None): + separator=' ', prefix='', style=repr, formatter=None): """ Return a string representation of an array. """ @@ -1349,11 +1316,11 @@ Exception was: Return a string representation of the data in an array. """ self._not_implemented_yet('array_str') -#Text formatting options +# Text formatting options def set_printoptions(self, precision=None, threshold=None, edgeitems=None, - linewidth=None, suppress=None, nanstr=None, - infstr=None, formatter=None): + linewidth=None, suppress=None, nanstr=None, + infstr=None, formatter=None): """ Set printing options. """ @@ -1370,7 +1337,7 @@ Exception was: Set a Python function to be used when pretty printing arrays. """ self._not_implemented_yet('set_string_function') -#Base-n representations +# Base-n representations def binary_repr(self, num, width=None): """ @@ -1387,9 +1354,9 @@ Exception was: ###################### ### LINEAR ALGEBRA ### -## See https://docs.scipy.org/doc/numpy/reference/routines.linalg.html +# See https://docs.scipy.org/doc/numpy/reference/routines.linalg.html -#Matrix and vector products +# Matrix and vector products def dot(self, a, b, out=None): """ @@ -1428,7 +1395,7 @@ Exception was: self._not_implemented_yet('tensordot') def einsum(self, subscripts, out=None, dtype=None, order=MemoryOrdering.SAME_ORDER, - casting='safe', optimize=False, *operands): + casting='safe', optimize=False, *operands): """ Evaluates the Einstein summation convention on the operands. """ @@ -1446,7 +1413,7 @@ Exception was: """ self._not_implemented_yet('kron') -#Decompositions +# Decompositions def cholesky(self, a): """ @@ -1466,7 +1433,7 @@ Exception was: """ self._not_implemented_yet('svd') -#Matrix eigenvalues +# Matrix eigenvalues def eig(self, a): """ @@ -1492,7 +1459,7 @@ Exception was: """ self._not_implemented_yet('eigvalsh') -#Norms and other numbers +# Norms and other numbers def norm(self, x, ord=None, axis=None, keepdims=False): """ @@ -1530,7 +1497,7 @@ Exception was: """ self._not_implemented_yet('trace') -#Solving equations and inverting matrices +# Solving equations and inverting matrices def solve(self, a, b): """ @@ -1571,9 +1538,9 @@ Exception was: ################### # LOGIC FUNCTIONS # -## See https://docs.scipy.org/doc/numpy/reference/routines.logic.html +# See https://docs.scipy.org/doc/numpy/reference/routines.logic.html -#Truth value testing +# Truth value testing def any(self, a, axis=None, out=None): """ @@ -1587,7 +1554,7 @@ Exception was: """ self._not_implemented_yet('all') -#Array contents +# Array contents def isfinite(self, x, out=None): """ @@ -1619,7 +1586,7 @@ Exception was: """ self._not_implemented_yet('isposinf') -#Logical operations +# Logical operations def logical_and(self, x1, x2, out=None): """ @@ -1645,7 +1612,7 @@ Exception was: """ self._not_implemented_yet('logical_xor') -#Comparisson +# Comparisson def allclose(self, a, b, rtol=1e-05, atol=1e-08, equal_nan=False): """ @@ -1710,7 +1677,7 @@ Exception was: ########################## # MATHEMATICAL FUNCTIONS # -## See https://docs.scipy.org/doc/numpy/reference/routines.math.html +# See https://docs.scipy.org/doc/numpy/reference/routines.math.html # Trigonometric functions @@ -1732,7 +1699,6 @@ Exception was: """ self._not_implemented_yet('tan') - def arcsin(self, x, out=None): """ Inverse sine, element-wise. @@ -1757,7 +1723,6 @@ Exception was: """ self._not_implemented_yet('arctan2') - def hypot(self, x1, x2, out=None): """ Given the legs of a right triangle, return its hypotenuse. @@ -1802,7 +1767,6 @@ Exception was: """ self._not_implemented_yet('tanh') - def arcsinh(self, x, out=None): """ Inverse hyperbolic sine element-wise. @@ -1829,14 +1793,12 @@ Exception was: """ self._not_implemented_yet('around') - def fix(self, x, y=None): """ Round to nearest integer towards zero. """ self._not_implemented_yet('fix') - def rint(self, x, out=None): """ Round elements of the array to the nearest integer. @@ -1965,7 +1927,6 @@ Exception was: """ self._not_implemented_yet('expm1') - def log(self, x, out=None): """ Natural logarithm, element-wise. @@ -1990,7 +1951,6 @@ Exception was: """ self._not_implemented_yet('log1p') - def logaddexp(self, x1, x2, out=None): """ Logarithm of the sum of exponentiations of the inputs. @@ -2199,7 +2159,6 @@ Exception was: """ self._not_implemented_yet('interp') - def maximum(self, x1, x2, out=None): """ Element-wise maximum of array elements. @@ -2230,14 +2189,12 @@ Exception was: """ self._not_implemented_yet('fabs') - def absolute(self, x, out=None): """ Calculate the absolute value element-wise. """ self._not_implemented_yet('absolute') - def sign(self, x, out=None): """ Returns an element-wise indication of the sign of a number. @@ -2247,7 +2204,7 @@ Exception was: ################### # RANDOM SAMPLING # -## See https://docs.scipy.org/doc/numpy/reference/routines.random.html +# See https://docs.scipy.org/doc/numpy/reference/routines.random.html # Simple random data @@ -2459,12 +2416,6 @@ Exception was: """ self._not_implemented_yet('poisson') - def power(self, a, size=None): - """ - Draws samples in 0, 1 from a power distribution with positive exponent a - 1. - """ - self._not_implemented_yet('power') - def rayleigh(self, scale=1.0, size=None): """ Draw samples from a Rayleigh distribution. @@ -2560,7 +2511,7 @@ Exception was: ################ # SET ROUTINES # -## See https://docs.scipy.org/doc/numpy/reference/routines.set.html +# See https://docs.scipy.org/doc/numpy/reference/routines.set.html # Making proper sets @@ -2605,7 +2556,7 @@ Exception was: ################################### # SORTING, SEARCHING AND COUNTING # -## See https://docs.scipy.org/doc/numpy/reference/routines.sort.html +# See https://docs.scipy.org/doc/numpy/reference/routines.sort.html # Sorting @@ -2651,7 +2602,7 @@ Exception was: """ self._not_implemented_yet('argpartition') -#Searching +# Searching def argmax(self, a, axis, out=None): """ @@ -2713,7 +2664,7 @@ Exception was: """ self._not_implemented_yet('extract') -#Counting +# Counting def count_nonzero(self, a, axis=None): """ @@ -2723,9 +2674,9 @@ Exception was: ############## # STATISTICS # -## See https://docs.scipy.org/doc/numpy/reference/routines.sort.html +# See https://docs.scipy.org/doc/numpy/reference/routines.sort.html -#Order statistics +# Order statistics def amin(self, a, axis=None, out=None): """ @@ -2758,14 +2709,14 @@ Exception was: self._not_implemented_yet('ptp') def percentile(self, a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear'): + interpolation='linear'): """ Compute the qth percentile of the data along the specified axis. """ self._not_implemented_yet('percentile') def nanpercentile(self, a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear'): + interpolation='linear'): """ Compute the qth percentile of the data along the specified axis, while ignoring nan values. @@ -2885,16 +2836,15 @@ Exception was: """ Convert angles from radians to degrees. """ - return self.rad2deg(x=x,out=out, **kargs) + return self.rad2deg(x=x, out=out, **kargs) def radians(self, x, out=None, **kargs): """ Convert angles from degrees to radians. """ - return self.deg2rad(x=x,out=out, **kargs) + return self.deg2rad(x=x, out=out, **kargs) - - def remainder(self, x1, x2, out=None,**kargs): + def remainder(self, x1, x2, out=None, **kargs): """ Return element-wise remainder of division (MOD). Remainder has the same sign as the divisor x2. @@ -2911,8 +2861,8 @@ def __generate_hysop_type_functions(): functions = { - 'as{type}array': -''' + 'as{type}array': + ''' def hysop_array_generated_method(self, a, order=default_order, **kargs): """ Convert the input to an array of dtype HYSOP_{TYPE}. @@ -2921,7 +2871,7 @@ def hysop_array_generated_method(self, a, order=default_order, **kargs): return self.asarray(a=a, dtype=dtype, order=order, **kargs) ''', '{type}_prod': -''' + ''' def hysop_array_generated_method(self, a, axis=None, out=None, **kargs): """ Sum of array elements over a given axis. @@ -2930,7 +2880,7 @@ def hysop_array_generated_method(self, a, axis=None, out=None, **kargs): return self.prod(a=a,axis=axis,out=out,dtype=dtype,**kargs) ''', '{type}_sum': -''' + ''' def hysop_array_generated_method(self, a, axis=None, out=None, **kargs): """ Sum of array elements over a given axis. @@ -2940,7 +2890,7 @@ def hysop_array_generated_method(self, a, axis=None, out=None, **kargs): ''', '{type}_empty': -''' + ''' def hysop_array_generated_method(self, shape, order=default_order, **kargs): """ Return a new array of given shape and type, without initializing entries. @@ -2950,7 +2900,7 @@ def hysop_array_generated_method(self, shape, order=default_order, **kargs): ''', '{type}_ones': -''' + ''' def hysop_array_generated_method(self, shape, order=default_order, **kargs): """ Return a new array of given shape filled with ones of type HYSOP_{TYPE}. @@ -2960,7 +2910,7 @@ def hysop_array_generated_method(self, shape, order=default_order, **kargs): ''', '{type}_zeros': -''' + ''' def hysop_array_generated_method(self, shape, order=default_order, **kargs): """ Return a new array of given shape, filled with zeros of type HYSOP_{TYPE}. @@ -2970,7 +2920,7 @@ def hysop_array_generated_method(self, shape, order=default_order, **kargs): ''', '{type}_full': -''' + ''' def hysop_array_generated_method(self, shape, fill_value, order=default_order, **kargs): """ Return a new array of given shape, filled with fill_value of type HYSOP_{TYPE}. @@ -2978,7 +2928,7 @@ def hysop_array_generated_method(self, shape, fill_value, order=default_order, * dtype = HYSOP_{TYPE} return self.full(shape=shape, fill_value=filling_value, order=order, dtype=dtype, **kargs) ''' -} + } hysop_types = ['real', 'complex', 'integer', 'index', 'dim', 'bool'] @@ -2986,7 +2936,7 @@ def hysop_array_generated_method(self, shape, fill_value, order=default_order, * for fname, fdefinition in functions.items(): fname = fname.format(type=ht, TYPE=ht.upper()) fdef = \ -''' + ''' from hysop.constants import default_order, MemoryOrdering, Backend from hysop.constants import HYSOP_REAL, HYSOP_COMPLEX, HYSOP_ORDER from hysop.constants import HYSOP_INTEGER, HYSOP_INDEX, HYSOP_DIM, HYSOP_BOOL @@ -2996,5 +2946,5 @@ from hysop.constants import HYSOP_INTEGER, HYSOP_INDEX, HYSOP_DIM, HYSOP_BOOL exec(fdef, namespace) setattr(ArrayBackend, fname, namespace['hysop_array_generated_method']) -__generate_hysop_type_functions() +__generate_hysop_type_functions() diff --git a/hysop/core/checkpoints.py b/hysop/core/checkpoints.py index 365a759f4..d69f079a0 100644 --- a/hysop/core/checkpoints.py +++ b/hysop/core/checkpoints.py @@ -127,8 +127,8 @@ class CheckpointHandler(object): # ok, use another directory name to avoid dataloss... load_checkpoint_dir = os.path.join(os.path.dirname(load_checkpoint_path), '{}'.format(uuid.uuid4().hex)) - tf = tarfile.open(load_checkpoint_path, mode='r') - tf.extractall(path=load_checkpoint_dir) + with tarfile.open(load_checkpoint_path, mode='r') as tf: + tf.extractall(path=load_checkpoint_dir) else: load_checkpoint_dir = None load_checkpoint_dir = comm.bcast(load_checkpoint_dir, root=io_leader) @@ -477,7 +477,7 @@ class CheckpointHandler(object): # checkpoint template may have been deleted by user during simulation if (self._checkpoint_template is None) or (not os.path.isdir(self._checkpoint_template)): - self.create_checkpoint_template(problem) + self.create_checkpoint_template(problem, simulation) checkpoint_template = self._checkpoint_template checkpoint_compressor = self._checkpoint_compressor diff --git a/hysop/core/graph/allocator.py b/hysop/core/graph/allocator.py index 154b38e73..9fb697ab8 100644 --- a/hysop/core/graph/allocator.py +++ b/hysop/core/graph/allocator.py @@ -1,18 +1,19 @@ - from abc import ABCMeta, abstractmethod import numpy as np +from hysop.tools.decorators import not_implemented, debug from hysop.tools.types import check_instance from hysop.core.arrays.all import ArrayBackend, HostArrayBackend, OpenClArrayBackend -class MemoryRequestsAllocator(object, metaclass=ABCMeta): +class MemoryAllocator(object, metaclass=ABCMeta): @classmethod @not_implemented def handle_requests(cls, requests): pass + class StandardArrayAllocator(MemoryAllocator): def __init__(self, array_backend): @@ -26,7 +27,7 @@ class StandardArrayAllocator(MemoryAllocator): return npb.empty(shape=(nbytes,), dtype=dtype) def handle_requests(self, requests): - from hysop.core.graph.mem_request import MultipleOperatorMemoryRequests + from hysop.core.memory.memory_request import MultipleOperatorMemoryRequests if not isinstance(requests, MultipleOperatorMemoryRequests): msg = 'requests is not an instance of MultipleOperatorMemoryRequests (got a {}).' raise ValueError(msg.format(requests.__class__)) @@ -39,40 +40,43 @@ class StandardArrayAllocator(MemoryAllocator): array = self.allocate(total_bytes) op_requests = requests._all_requests_per_allocator[self] - views = requests._allocated_buffers + views = requests._allocated_buffers self.build_array_views(array, op_requests, views) def build_array_views(self, array, op_requests, views): - from hysop.core.graph.mem_request import OperatorMemoryRequests + from hysop.core.memory.memory_request import OperatorMemoryRequests check_instance(op_requests, dict) ptr = array.ctypes.data - for (op,requests) in op_requests.items(): + for (op, requests) in op_requests.items(): check_instance(requests, dict, values=OperatorMemoryRequests) start_idx = 0 for (req_id, req) in requests.items(): align_offset = (-ptr % req.alignment) start_idx += align_offset - end_idx = start_idx + req.data_bytes() + end_idx = start_idx + req.data_bytes() - view = data[start_idx:end_idx].view(req.dtype).reshape(req.shape) + view = array[start_idx:end_idx].view(req.dtype).reshape(req.shape) if op not in views: views[op] = {} views[op][req_id] = view start_idx = end_idx ptr += align_offset + req.data_bytes() - assert end_idx <= total_bytes + assert end_idx <= requests.min_bytes_to_allocate(self) + + +NumpyMemoryRequestAllocator = StandardArrayAllocator(array_backend=HostArrayBackend) -NumpyMemoryRequestAllocator = StandardArrayAllocator(array_backend=HostArray) def OpenClMemoryRequestAllocator(cl_env): return StandardArrayAllocator(array_backend=cl_env.array_backend()) + if __name__ == '__main__': m0 = NumpyMemRequest(count=1, dtype=np.int32) m1 = NumpyMemRequest(shape=(2,), dtype=np.int32) - m2 = NumpyMemRequest(shape=(2,2), dtype=np.int32) - m3 = NumpyMemRequest(shape=(2,2,2,), dtype=np.int32, alignment=64) + m2 = NumpyMemRequest(shape=(2, 2), dtype=np.int32) + m3 = NumpyMemRequest(shape=(2, 2, 2,), dtype=np.int32, alignment=64) m4 = NumpyMemRequest(count=3, dtype=np.int32) m5 = NumpyMemRequest(shape=(4,), dtype=np.int32) @@ -114,4 +118,3 @@ if __name__ == '__main__': print(all_reqs.buffers[NumpyMemoryAllocator][0].view(dtype=np.int32)) assert m3.data.ctypes.data % 64 == 0 - diff --git a/hysop/core/graph/computational_graph.py b/hysop/core/graph/computational_graph.py index cc4c5f078..dc091fe77 100644 --- a/hysop/core/graph/computational_graph.py +++ b/hysop/core/graph/computational_graph.py @@ -31,6 +31,14 @@ class ComputationalGraph(ComputationalGraphNode, metaclass=ABCMeta): def __new__(cls, candidate_input_tensors=None, candidate_output_tensors=None, **kwds): + if ('input_fields' in kwds.keys()) or ('output_fields' in kwds.keys()): + msg = 'input_fields or output_fields parameters should not be used in {}, they are \ + deduced during graph construction (building step).'.format(cls) + raise ValueError(msg) + if ('input_params' in kwds.keys()) or ('output_params' in kwds.keys()): + msg = 'input_params or output_params parameters should not be used in {}, they are \ + deduced during graph construction (building step).'.format(cls) + raise ValueError(msg) return super(ComputationalGraph, cls).__new__(cls, input_fields=None, output_fields=None, **kwds) @@ -53,16 +61,6 @@ class ComputationalGraph(ComputationalGraphNode, metaclass=ABCMeta): Graph building is done at the end of the initialization step, after all internal nodes have been initialized. """ - - if ('input_fields' in kwds.keys()) or ('output_fields' in kwds.keys()): - msg = 'input_fields or output_fields parameters should not be used in {}, they are \ - deduced during graph construction (building step).'.format(cls) - raise ValueError(msg) - if ('input_params' in kwds.keys()) or ('output_params' in kwds.keys()): - msg = 'input_params or output_params parameters should not be used in {}, they are \ - deduced during graph construction (building step).'.format(cls) - raise ValueError(msg) - super(ComputationalGraph, self).__init__(input_fields=None, output_fields=None, **kwds) diff --git a/hysop/core/tests/test_checkpoint.sh b/hysop/core/tests/test_checkpoint.sh index 17c86157d..ccd229780 100755 --- a/hysop/core/tests/test_checkpoint.sh +++ b/hysop/core/tests/test_checkpoint.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash set -feu -o pipefail -PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE:-python3.8} +PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE:-python3} MPIRUN_EXECUTABLE=${MPIRUN_EXECUTABLE:-mpirun} SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" @@ -15,7 +15,7 @@ function compare_files { echo "File '${2}' does not exist." exit 1 fi - + # see https://stackoverflow.com/questions/3679296/only-get-hash-value-using-md5sum-without-filename # for the bash array assignment trick (solution proposed by Peter O.) # we also remove signs in front of zeros @@ -145,8 +145,8 @@ echo ' Comparing solutions...' for f0 in $(find "${TEST_DIR}/run0" -name '*.h5' | sort -n); do f1=$(echo "${f0}" | sed 's/run0/run1/') f2=$(echo "${f0}" | sed 's/run0/run2/') - h5diff -d '1e-12' "${f0}" "${f1}" - h5diff -d '1e-12' "${f0}" "${f2}" + h5diff -d '1e-12' "${f0}" "${f1}" + h5diff -d '1e-12' "${f0}" "${f2}" echo " >$(basename ${f0}) match" done @@ -160,9 +160,9 @@ for f3 in $(find "${TEST_DIR}/run3" -name '*.h5' | sort -n); do f0=$(echo "${f3}" | sed 's/run3/run0/') f4=$(echo "${f3}" | sed 's/run3/run4/') f5=$(echo "${f3}" | sed 's/run3/run5/') - h5diff -d '1e-12' "${f0}" "${f3}" - h5diff -d '1e-12' "${f0}" "${f4}" - h5diff -d '1e-12' "${f0}" "${f5}" + h5diff -d '1e-12' "${f0}" "${f3}" + h5diff -d '1e-12' "${f0}" "${f4}" + h5diff -d '1e-12' "${f0}" "${f5}" echo " >$(basename ${f0}) match" done @@ -172,7 +172,6 @@ ${MPIRUN_EXECUTABLE} -np 1 ${PYTHON_EXECUTABLE} "${EXAMPLE_FILE}" ${COMMON_OPTIO echo ' Comparing solutions...' for f6 in $(find "${TEST_DIR}/run6" -name '*.h5' | sort -n); do f7=$(echo "${f6}" | sed 's/run0/run7/') - h5diff -d '5e-5' "${f6}" "${f7}" + h5diff -d '5e-5' "${f6}" "${f7}" echo " >$(basename ${f6}) match" done - diff --git a/hysop/fields/continuous_field.py b/hysop/fields/continuous_field.py index 399f3354e..b87a2910b 100644 --- a/hysop/fields/continuous_field.py +++ b/hysop/fields/continuous_field.py @@ -1068,7 +1068,7 @@ class TensorField(NamedTensorContainerI, FieldContainerI): field0 = first_not_None(*fields) if (field0 is None): msg = 'Tensor field {} should at least contain a valid ScalarField.' - msg = msg.format(name) + msg = msg.format(field0.name) raise ValueError(msg) @classmethod @@ -1116,7 +1116,7 @@ class TensorField(NamedTensorContainerI, FieldContainerI): msg = 'Name {} was already used by another field.' msg = msg.format(name) raise ValueError(msg) - if (pname in pnames) and (pretty_name[name] is not field): + if (pname in pnames) and (pnames[pname] is not field): msg = 'Name {} was already used by another field.' msg = msg.format(pname) raise ValueError(msg) diff --git a/hysop/fields/discrete_field.py b/hysop/fields/discrete_field.py index d3babc3a0..4b9ae6818 100644 --- a/hysop/fields/discrete_field.py +++ b/hysop/fields/discrete_field.py @@ -1,3 +1,5 @@ +# coding: utf-8 + """ Discrete fields (scalars or vectors) descriptions. * :class:`~hysop.fields.discrete_field.DiscreteScalarFieldViewContainerI` @@ -20,10 +22,11 @@ from hysop.constants import GhostOperation, MemoryOrdering from hysop.core.arrays.all import ArrayBackend from hysop.topology.topology import Topology, TopologyView, TopologyState from hysop.fields.continuous_field import Field, VectorField, TensorField, \ - NamedScalarContainerI, NamedTensorContainerI + NamedScalarContainerI, NamedTensorContainerI from hysop.domain.domain import DomainView from hysop.mesh.mesh import MeshView + class DiscreteScalarFieldViewContainerI(object, metaclass=ABCMeta): """ Common abstract interface for scalar and tensor-like container of @@ -79,33 +82,35 @@ class DiscreteScalarFieldViewContainerI(object, metaclass=ABCMeta): def ids_to_components(self, ids): """Convert tensor coordinates into 1d offsets.""" - check_instance(ids, tuple, values=(int,tuple), allow_none=True) + check_instance(ids, tuple, values=(int, tuple), allow_none=True) return tuple(self.id_to_component(_) for _ in ids) def id_to_component(self, val): - check_instance(val, (int,tuple)) + check_instance(val, (int, tuple)) if isinstance(val, int): return val - elif len(val)==1: + elif len(val) == 1: return val[0] else: - strides = np.empty(shape=self.shape, dtype=np.int8).strides - assert len(val)==len(stride) - return sum(i*stride for (i,stride) in zip(val, stride)) - + stride = np.empty(shape=self.shape, dtype=np.int8).strides + assert len(val) == len(stride) + return sum(i*stride for (i, stride) in zip(val, stride)) @abstractmethod def initialize(self, **kwds): """Initialize all contained discrete fields.""" pass + @abstractmethod def fill(self, **kwds): """Fill all contained discrete field with an initial value.""" pass + @abstractmethod def randomize(self, **kwds): """Fill all contained discrete field with random values.""" pass + @abstractmethod def copy(self, from_dfield, **kwds): """Fill this discrete field with values from another one.""" @@ -282,7 +287,7 @@ class DiscreteScalarFieldViewContainerI(object, metaclass=ABCMeta): if type(a) != type(b): return False if isinstance(a, (list, tuple, set, frozenset)): - for (ai,bi) in zip(a, b): + for (ai, bi) in zip(a, b): if not are_equal(ai, bi): return False return True @@ -295,32 +300,39 @@ class DiscreteScalarFieldViewContainerI(object, metaclass=ABCMeta): return False return True if isinstance(a, np.ndarray): - return np.array_equal(a,b) - return (a==b) + return np.array_equal(a, b) + return (a == b) objects = self.get_attributes(*attr) obj0 = objects[0] for obj in objects[1:]: if not are_equal(obj0, obj): return False return True + def has_unique_backend(self): """Return true if all contained discrete fields share the same backend.""" return self.has_unique_attribute('backend') + def has_unique_backend_kind(self): """Return true if all contained discrete fields share the same backend kind.""" return self.has_unique_attribute('backend_kind') + def has_unique_domain(self): """Return true if all contained discrete fields share the same domain view.""" return self.has_unique_attribute('domain') + def has_unique_topology(self): """Return true if all contained discrete fields share the same topology view.""" return self.has_unique_attribute('topology') + def has_unique_topology_state(self): """Return true if all contained discrete fields share the same topology state.""" return self.has_unique_attribute('topology_state') + def has_unique_mesh(self): """Return true if all contained discrete fields share the same mesh view.""" return self.has_unique_attribute('mesh') + def has_unique_dtype(self): """Return true if all contained discrete fields share the same data type.""" return self.has_unique_attribute('dtype') @@ -334,9 +346,9 @@ class DiscreteScalarFieldViewContainerI(object, metaclass=ABCMeta): /!\ Can be slow to evaluate due to uniqueness check /!\ """ if self.has_unique_attribute(*attr): - return self.discrete_field_views()[0].get_attributes(*attr)[0] - msg='{} is not unique accross contained discrete fields.' - msg=msg.format('.'.join(attr)) + return self.discrete_field_views()[0].get_attributes(*attr)[0] + msg = '{} is not unique accross contained discrete fields.' + msg = msg.format('.'.join(attr)) raise AttributeError(msg) @property @@ -409,8 +421,10 @@ class DiscreteScalarFieldViewContainerI(object, metaclass=ABCMeta): def __eq__(self, other): return self.match(other) + def __ne__(self, other): return self.match(other, invert=True) + def __str__(self): return self.long_description() @@ -429,14 +443,14 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie @debug def __init__(self, dfield, topology_state, **kwds): super(DiscreteScalarFieldView, self).__init__(obj_view=dfield, - variable_kind=Variable.DISCRETE_FIELD, **kwds) + variable_kind=Variable.DISCRETE_FIELD, **kwds) @debug def __new__(cls, dfield, topology_state, **kwds): check_instance(dfield, DiscreteScalarField, allow_none=issubclass(cls, DiscreteScalarField)) check_instance(topology_state, TopologyState) obj = super(DiscreteScalarFieldView, cls).__new__(cls, obj_view=dfield, - variable_kind=Variable.DISCRETE_FIELD, **kwds) + variable_kind=Variable.DISCRETE_FIELD, **kwds) dfield = first_not_None(dfield, obj) obj._dfield = dfield obj._topology_state = topology_state @@ -468,21 +482,26 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie def ndim(self): """Number of dimensions of this this tensor.""" return 0 + def nd_iter(self): """Return an nd-indexed iterator of contained objects.""" yield ((0,), self) + def __iter__(self): """Return an iterator on unique scalar objects.""" return (self,).__iter__() + def __tuple__(self): """ Fix hysop.tools/type.to_tuple for FieldContainers, because __iter__ has been redefined. """ return (self,) + def __contains__(self, obj): """Check if a scalar object is contained in self.""" return (obj is self) + def __getitem__(self, slc): return self @@ -492,15 +511,19 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie def _get_field(self): """Return the continuous field associated to this discrete field.""" return self._dfield._field + def _get_dfield(self): """Get the discrete field on which the view is.""" return self._dfield + def _get_field(self): """Get the continuous field on which the view is.""" return self._dfield._field + def _get_topology_state(self): """Get the topology state of this view.""" return self._topology_state + def _get_is_read_only(self): """Return true if this view is a read-only.""" return self._topology_state.is_read_only @@ -508,12 +531,15 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie def _get_name(self): """Get the name of the discrete field.""" return self._dfield._name + def _get_pretty_name(self): """Get the name of the discrete field.""" return self._dfield._pretty_name + def _get_latex_name(self): """Get the latex name of the discrete field.""" return self._dfield._latex_name + def _get_var_name(self): """Get the latex name of the discrete field.""" return self._dfield._var_name @@ -521,6 +547,7 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie def _get_dtype(self): """Get the data type of the discrete field.""" return self._dfield._field.dtype + def _get_initial_values(self): """ Get the default initial values of this field @@ -531,15 +558,19 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie def _get_topology(self): """Return a topology view on which this discrete field is defined.""" return self._topology_view + def _get_domain(self): """Return a domain view on which this discrete field is defined.""" return self._topology_view._domain_view + def _get_mesh(self): """Return a mesh view on which the current process operate for this discrete field.""" return self._topology_view._mesh_view + def _get_backend(self): """Get the array backend used to allocate this discrete field data.""" return self._topology_view.backend + def _get_backend_kind(self): """Get the array backend kind used to allocate this discrete field data.""" return self._topology_view.backend.kind @@ -548,11 +579,11 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie """Check if two DiscreteScalarFieldViews are equivalent.""" if not isinstance(other, DiscreteScalarFieldView): return NotImplemented - eq = (self._dfield._topology is other._dfield._topology) - eq &= (self._dfield._field is other._dfield._field) - eq &= (self._dfield._name == other._dfield._name) + eq = (self._dfield._topology is other._dfield._topology) + eq &= (self._dfield._field is other._dfield._field) + eq &= (self._dfield._name == other._dfield._name) eq &= (self._dfield._pretty_name == other._dfield._pretty_name) - eq &= (self._topology_state == other._topology_state) + eq &= (self._topology_state == other._topology_state) if invert: return not eq else: @@ -564,13 +595,14 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie def _get_memory_request(self): """Get memory request that should be allocated for this TmpCartesianDiscreteField.""" return getattr(self._dfield, '_memory_request', None) + def _get_memory_request_id(self): """Get memory request id that should be allocated for this TmpCartesianDiscreteField.""" return getattr(self._dfield, '_memory_request_id', None) def __hash__(self): - h = id(self._dfield._topology) - h ^= id(self._dfield._field) + h = id(self._dfield._topology) + h ^= id(self._dfield._field) h ^= hash(self._dfield._name) h ^= hash(self._dfield._pretty_name) h ^= hash(self._topology_state) @@ -579,6 +611,7 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie @property def symbol(self): return self._dfield._symbol + @property def s(self): return self._dfield._symbol @@ -588,8 +621,8 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie topology_state = property(_get_topology_state) is_read_only = property(_get_is_read_only) - name = property(_get_name) - pretty_name = property(_get_pretty_name) + name = property(_get_name) + pretty_name = property(_get_pretty_name) latex_name = property(_get_latex_name) var_name = property(_get_var_name) @@ -597,10 +630,10 @@ class DiscreteScalarFieldView(DiscreteScalarFieldViewContainerI, TaggedObjectVie initial_values = property(_get_initial_values) topology = property(_get_topology) - backend = property(_get_backend) - backend_kind = property(_get_backend_kind) - domain = property(_get_domain) - mesh = property(_get_mesh) + backend = property(_get_backend) + backend_kind = property(_get_backend_kind) + domain = property(_get_domain) + mesh = property(_get_mesh) memory_request = property(_get_memory_request) memory_request_id = property(_get_memory_request_id) @@ -626,9 +659,9 @@ class DiscreteScalarField(NamedScalarContainerI, TaggedObject, metaclass=ABCMeta @debug def __init__(self, field, topology, register_discrete_field=True, - name=None, pretty_name=None, - var_name=None, latex_name=None, - **kwds): + name=None, pretty_name=None, + var_name=None, latex_name=None, + **kwds): super(DiscreteScalarField, self).__init__(name=name, pretty_name=pretty_name, var_name=var_name, latex_name=latex_name, tag_prefix='df', **kwds) @@ -668,33 +701,32 @@ class DiscreteScalarField(NamedScalarContainerI, TaggedObject, metaclass=ABCMeta check_instance(name, str, allow_none=True) check_instance(pretty_name, str, allow_none=True) - _name, _pretty_name, _var_name, _latex_name = \ - cls.format_discrete_names(field.name, - field.pretty_name, - field.var_name, - field.latex_name, - topology) + cls.format_discrete_names(field.name, + field.pretty_name, + field.var_name, + field.latex_name, + topology) pretty_name = first_not_None(pretty_name, name, _pretty_name) - var_name = first_not_None(var_name, name, _var_name) - latex_name = first_not_None(latex_name, name, _latex_name) - name = first_not_None(name, _name) + var_name = first_not_None(var_name, name, _var_name) + latex_name = first_not_None(latex_name, name, _latex_name) + name = first_not_None(name, _name) obj = super(DiscreteScalarField, cls).__new__(cls, name=name, pretty_name=pretty_name, - var_name=var_name, latex_name=latex_name, - tag_prefix='df', **kwds) + var_name=var_name, latex_name=latex_name, + tag_prefix='df', **kwds) assert isinstance(obj, DiscreteScalarFieldView), 'DiscreteScalarFieldView not inherited.' if (field._domain is not topology._domain): - msg='Field domain {} and topology domain {} do not match.' - msg=msg.format(field.domain.full_tag, topology.domain.full_tag) + msg = 'Field domain {} and topology domain {} do not match.' + msg = msg.format(field.domain.full_tag, topology.domain.full_tag) raise ValueError(msg) if register_discrete_field: if (topology in field.discrete_fields): - msg='Field {} was already discretized on topology {}.'.format( - field.name, topology.tag) + msg = 'Field {} was already discretized on topology {}.'.format( + field.name, topology.tag) raise RuntimeError(msg) field.discrete_fields[topology] = obj @@ -709,18 +741,18 @@ class DiscreteScalarField(NamedScalarContainerI, TaggedObject, metaclass=ABCMeta from hysop.tools.sympy_utils import subscript if (topology is None): # Tensor discrete field names (topology is not unique) - name = '{}*'.format(name) + name = '{}*'.format(name) pretty_name = '{}*'.format(pretty_name) - latex_name = '{}'.format(latex_name) - var_name = None + latex_name = '{}'.format(latex_name) + var_name = None else: # Scalar discrete field names - name = '{}_t{}'.format(name, topology.id) + name = '{}_t{}'.format(name, topology.id) pretty_name = '{}.{}{}'.format(pretty_name, - 'ₜ', - subscript(topology.id)) - var_name = var_name + '_t{}'.format(topology.id) - latex_name = latex_name + '.t_{{{}}}'.format(0) + 'ₜ', + subscript(topology.id)) + var_name = var_name + '_t{}'.format(topology.id) + latex_name = latex_name + '.t_{{{}}}'.format(0) return (name, pretty_name, var_name, latex_name) @@ -748,11 +780,11 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine return True def __init__(self, field, dfields, name=None, - pretty_name=None, latex_name=None, **kwds): + pretty_name=None, latex_name=None, **kwds): super(DiscreteTensorField, self).__init__(name=name, - pretty_name=pretty_name, latex_name=latex_name, - tag_prefix='tdf', tagged_cls=DiscreteTensorField, - contained_objects=dfields, **kwds) + pretty_name=pretty_name, latex_name=latex_name, + tag_prefix='tdf', tagged_cls=DiscreteTensorField, + contained_objects=dfields, **kwds) def __new__(cls, field, dfields, name=None, pretty_name=None, latex_name=None, **kwds): @@ -768,16 +800,16 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine **kwds) _name, _pretty_name, _, _latex_name = DiscreteScalarField.format_discrete_names( - field.name, field.pretty_name, None, field.latex_name, None) - name = first_not_None(name, _name) + field.name, field.pretty_name, None, field.latex_name, None) + name = first_not_None(name, _name) pretty_name = first_not_None(pretty_name, _pretty_name) - latex_name = first_not_None(latex_name, _latex_name) + latex_name = first_not_None(latex_name, _latex_name) obj = super(DiscreteTensorField, cls).__new__(cls, name=name, - pretty_name=pretty_name, latex_name=latex_name, - tag_prefix='tdf', tagged_cls=DiscreteTensorField, - contained_objects=dfields, **kwds) - obj._field = field + pretty_name=pretty_name, latex_name=latex_name, + tag_prefix='tdf', tagged_cls=DiscreteTensorField, + contained_objects=dfields, **kwds) + obj._field = field obj._dfields = dfields obj._clone_id = 0 @@ -791,7 +823,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine def determine_tensor_cls(cls, dfields): """Determine the Tensor container best suited for contained dfields.""" from hysop.fields.cartesian_discrete_field import CartesianDiscreteScalarFieldView, \ - CartesianDiscreteTensorField + CartesianDiscreteTensorField if isinstance(dfields, npw.ndarray): dfields = tuple(dfields.ravel().tolist()) check_instance(dfields, tuple, values=DiscreteScalarFieldView) @@ -807,7 +839,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine from a list of discrete fields and a shape. """ dfields = to_tuple(dfields) - shape = to_tuple(shape) + shape = to_tuple(shape) check_instance(dfields, tuple, values=(DiscreteScalarFieldView,), minsize=1) check_instance(shape, tuple, values=int) @@ -815,7 +847,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine cls._check_dfields(*dfields) fields = tuple(dfield._dfield._field for dfield in dfields) - field = TensorField.from_fields(name=name, pretty_name=pretty_name, + field = TensorField.from_fields(name=name, pretty_name=pretty_name, fields=fields, shape=shape) dfields = npw.asarray(dfields, dtype=object).reshape(shape) @@ -835,15 +867,15 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine dfields = tuple(dfields.ravel().tolist()) return cls.from_dfields(dfields=dfields, shape=shape, - name=name, pretty_name=pretty_name, **kwds) + name=name, pretty_name=pretty_name, **kwds) @classmethod def _check_dfields(cls, *dfields): """Check that at least one field is specified.""" dfield0 = first_not_None(*dfields) if (dfield0 is None): - msg='Tensor discrete field {} should at least contain a valid DiscreteScalarField.' - msg=msg.format(name) + msg = 'Tensor discrete field {} should at least contain a valid DiscreteScalarField.' + msg = msg.format(dfield0.name) raise ValueError(msg) def _check_names(self): @@ -851,17 +883,17 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine names = {} pnames = {} for dfield in self: - name = dfield.name + name = dfield.name pname = dfield.pretty_name if (name in names) and (names[name] != dfield): - msg='Name {} was already used by another discrete field.' - msg=msg.format(name) + msg = 'Name {} was already used by another discrete field.' + msg = msg.format(name) raise ValueError(msg) - if (pname in pnames) and (pretty_name[name] != dfield): - msg='Name {} was already used by another discrete field.' - msg=msg.format(pname) + if (pname in pnames) and (pnames[pname] != dfield): + msg = 'Name {} was already used by another discrete field.' + msg = msg.format(pname) raise ValueError(msg) - names[name] = dfield + names[name] = dfield pnames[name] = dfield def discrete_field_views(self): @@ -895,8 +927,8 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine def copy(self, from_dfield, **kwds): """Fill this discrete field with values from another one.""" if isinstance(from_dfield, tuple): - assert len(from_dfield)==self.nb_components - for (dfv,src) in zip(self.discrete_field_views(), from_dfield): + assert len(from_dfield) == self.nb_components + for (dfv, src) in zip(self.discrete_field_views(), from_dfield): dfv.copy(from_dfield=src) else: check_instance(from_dfield, DiscreteTensorField) @@ -906,7 +938,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine return self def clone(self, tstate=None, - name=None, pretty_name=None, **kwds): + name=None, pretty_name=None, **kwds): """ Create a new temporary DiscreteScalarField container and allocate it like the current object, with possibly a different topology state. @@ -923,7 +955,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine dfields[idx] = dfield.clone(tstate=tstate, **kwds) self._clone_id += 1 return self.from_dfield_array(name=name, pretty_name=pretty_name, dfields=dfields, - **kwds) + **kwds) def tmp_dfield_like(self, name, pretty_name=None, **kwds): r""" @@ -938,13 +970,13 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine for (idx, dfield) in self.nd_iter(): _name = TensorField.default_name_formatter(basename=name, idx=idx) _pretty_name = TensorField.default_pretty_name_formatter( - basename=pretty_name, idx=idx) + basename=pretty_name, idx=idx) _name, _pretty_name, _var_name, _latex_name = \ - DiscreteScalarField.format_discrete_names(_name, _pretty_name, - _name, self.latex_name, dfield.topology) + DiscreteScalarField.format_discrete_names(_name, _pretty_name, + _name, self.latex_name, dfield.topology) (dfield, request, request_id) = dfield.tmp_dfield_like(name=_name, - pretty_name=_pretty_name, - **kwds) + pretty_name=_pretty_name, + **kwds) requests.push_mem_request(request_id, request) dfields[idx] = dfield @@ -968,7 +1000,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine check_instance(other, DiscreteTensorField) assert npw.array_equal(self.shape, other.shape) eq = all(dfield.match(other[idx], invert=False) - for (idx, dfield) in self.nd_iter()) + for (idx, dfield) in self.nd_iter()) eq &= npw.array_equal(self.shape, other.shape) eq &= (self._name == other._name) eq &= (self._pretty_name == other._pretty_name) @@ -978,7 +1010,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine return eq def __hash__(self): - h = hash(self._name) + h = hash(self._name) h ^= hash(self._pretty_name) h ^= hash(self.shape) for (_, dfield) in self.nd_iter(): @@ -1027,14 +1059,14 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine def short_description(self): """Short description of this discrete field container.""" - s = '{}[name={}, pname={}, shape={}, nb_components={}]' - s = s.format(self.full_tag, self.name, self.pretty_name, - self.shape, self.nb_components) + s = '{}[name={}, pname={}, shape={}, nb_components={}]' + s = s.format(self.full_tag, self.name, self.pretty_name, + self.shape, self.nb_components) return s def long_description(self): """Long description of this discrete field container.""" - s='''\ + s = '''\ {} *name: {} *pname: {} @@ -1044,7 +1076,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine '''.format(self.full_tag, self.name, self.pretty_name, self.shape, self.nb_components) - s+=' '+'\n '.join(str(self.symbol).split('\n')) + s += ' '+'\n '.join(str(self.symbol).split('\n')) return s def common_dtype(self): @@ -1064,7 +1096,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine return integrals def exchange_ghosts(self, build_exchanger=False, build_launcher=False, - evt=None, **kwds): + evt=None, **kwds): """ Exchange ghosts using cached ghost exchangers which are built at first use. ie. Exchange every ghosts components of self.data using current topology state. @@ -1077,8 +1109,8 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine all_none = True for (idx, dfield) in self.nd_iter(): ge = dfield.exchange_ghosts( - build_launcher=False, build_exchanger=True, - **kwds) + build_launcher=False, build_exchanger=True, + **kwds) all_none &= (ge is None) ghost_exchangers += ge if all_none: @@ -1094,7 +1126,7 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine evt = first_not_None(_evt, evt) return evt - def accumulate_ghosts(self, build_launcher=False, build_exchanger=False, **kwds): + def accumulate_ghosts(self, build_launcher=False, build_exchanger=False, evt=None, **kwds): """ Exchange ghosts using cached ghost exchangers which are built at first use. ie. Exchange every ghosts components of self.data using current topology state. @@ -1107,8 +1139,8 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine all_none = True for (idx, dfield) in self.nd_iter(): ge = dfield.accumulate_ghosts( - build_launcher=False, build_exchanger=True, - **kwds) + build_launcher=False, build_exchanger=True, + **kwds) all_none &= (ge is None) ghost_exchangers += ge if all_none: @@ -1152,46 +1184,52 @@ class DiscreteTensorField(NamedTensorContainerI, DiscreteScalarFieldViewContaine def _get_data(self): return tuple(dfield.sdata for dfield in self.discrete_field_views()) + def _set_data(self, copy_data): dfields = self.discrete_field_views() - if (len(dfields)==1) and isinstance(copy_data, npw.ndarray): + if (len(dfields) == 1) and isinstance(copy_data, npw.ndarray): copy_data = (copy_data,) check_instance(copy_data, tuple, size=len(dfields)) for (dfield, data) in zip(dfields, copy_data): dfield._set_data(data) + def _get_buffers(self): return tuple(dfield.sbuffer for dfield in self.discrete_field_views()) def _get_sdata(self): self._raise_sdata() + def _set_sdata(self, copy_data): self._raise_sdata() + def _get_sbuffer(self): self._raise_sdata() + def _raise_sdata(self): - msg='sdata and sbuffer are only attributes of ScalarFields, ' - msg+='use data or buffers instead.' + msg = 'sdata and sbuffer are only attributes of ScalarFields, ' + msg += 'use data or buffers instead.' raise RuntimeError(msg) - data = property(_get_data, _set_data) - buffers = property(_get_buffers) - sdata = property(_get_sdata, _set_sdata) - sbuffer = property(_get_sbuffer) + data = property(_get_data, _set_data) + buffers = property(_get_buffers) + sdata = property(_get_sdata, _set_sdata) + sbuffer = property(_get_sbuffer) def initialize(self, *args, **kwds): - msg='This method is only defined for specific topologies ' - msg+='(see CartesianDiscreteTensorField).' + msg = 'This method is only defined for specific topologies ' + msg += '(see CartesianDiscreteTensorField).' raise RuntimeError(msg) def norm(self, *args, **kwds): - msg='This method is only defined for specific topologies ' - msg+='(see CartesianDiscreteTensorField).' + msg = 'This method is only defined for specific topologies ' + msg += '(see CartesianDiscreteTensorField).' raise RuntimeError(msg) def distance(self, *args, **kwds): - msg='This method is only defined for specific topologies ' - msg+='(see CartesianDiscreteTensorField).' + msg = 'This method is only defined for specific topologies ' + msg += '(see CartesianDiscreteTensorField).' raise RuntimeError(msg) + DiscreteField = (DiscreteScalarField, DiscreteTensorField) """A DiscreteField is either of DiscreteScalarField or a DiscreteTensorField""" diff --git a/hysop/numerics/fft/fft.py b/hysop/numerics/fft/fft.py index c54495669..f08bc2044 100644 --- a/hysop/numerics/fft/fft.py +++ b/hysop/numerics/fft/fft.py @@ -37,7 +37,7 @@ def is_byte_aligned(array): elif isinstance(array, (OpenClArray, clArray.Array)): return (array.offset == 0) else: - msg='Unknown array type {}.'.format(type(array)) + msg = 'Unknown array type {}.'.format(type(array)) raise TypeError(msg) @@ -58,11 +58,11 @@ def mk_view(ndim, axis, *args, **kwds): on all axis but the one specified by 'axis' which is replaced by slice(*args) if len(args)>1 else args[0]. """ - default = kwds.pop('default', slice(None,None,None)) + default = kwds.pop('default', slice(None, None, None)) assert args, 'Need at least one arg !' assert not kwds, 'Unknown keyword arguments: {}'.format(kwds.keys()) view = [default]*ndim - if len(args)==1: + if len(args) == 1: view[axis] = args[0] else: view[axis] = slice(*args) @@ -131,7 +131,7 @@ class FFTPlanI(object, metaclass=ABCMeta): Method that has to be called before any call to execute. """ if self._setup: - msg='Plan was already setup...' + msg = 'Plan was already setup...' raise RuntimeError(msg) self._setup = True return self @@ -151,7 +151,6 @@ class FFTPlanI(object, metaclass=ABCMeta): assert not self._allocated self._allocated = True - @abstractmethod def execute(self): """ @@ -164,8 +163,8 @@ class FFTPlanI(object, metaclass=ABCMeta): Apply the FFT plan to possibly new input or output arrays. """ if (a is not None) or (out is not None): - msg='New array execute is not available for FFT backend {}.' - msg=msg.format(type(self).__name__) + msg = 'New array execute is not available for FFT backend {}.' + msg = msg.format(type(self).__name__) raise RuntimeError(msg) self.execute(**kwds) @@ -265,7 +264,7 @@ class FFTI(object, metaclass=ABCMeta): @classmethod def default_interface_from_backend(cls, backend, - enable_opencl_host_buffer_mapping, **kwds): + enable_opencl_host_buffer_mapping, **kwds): check_instance(backend, ArrayBackend) if (backend.kind is Backend.HOST): from hysop.numerics.fft.host_fft import HostFFTI @@ -279,26 +278,26 @@ class FFTI(object, metaclass=ABCMeta): from hysop.numerics.fft.opencl_fft import OpenClFFTI return OpenClFFTI.default_interface(cl_env=backend.cl_env, **kwds) else: - msg='Unknown backend kind {}.'.format(backend.kind) + msg = 'Unknown backend kind {}.'.format(backend.kind) def check_backend(self, backend, - enable_opencl_host_buffer_mapping): + enable_opencl_host_buffer_mapping): check_instance(backend, ArrayBackend) if enable_opencl_host_buffer_mapping: if (self.backend is not backend.host_array_backend): - msg='Host array backend mismatch {} vs {}.' - msg=msg.format(self.backend, backend) + msg = 'Host array backend mismatch {} vs {}.' + msg = msg.format(self.backend, backend) raise RuntimeError(msg) else: if (self.backend is not backend): - msg='Backend mismatch {} vs {}.' - msg=msg.format(self.backend, backend) + msg = 'Backend mismatch {} vs {}.' + msg = msg.format(self.backend, backend) raise RuntimeError(msg) def get_transform(self, transform): check_instance(transform, TransformType) if (transform not in self.__transform2fn): - msg='Unknown transform type {}.'.format(transform) + msg = 'Unknown transform type {}.'.format(transform) raise NotImplementedError(transform) (fname, fkwds) = self.__transform2fn[transform] fn = getattr(self, fname) @@ -307,8 +306,8 @@ class FFTI(object, metaclass=ABCMeta): return fn def __init__(self, backend, - warn_on_allocation=True, - error_on_allocation=False): + warn_on_allocation=True, + error_on_allocation=False): """Initializes the interface and default supported real and complex types.""" from hysop.core.arrays.array_backend import ArrayBackend check_instance(backend, ArrayBackend) @@ -317,11 +316,11 @@ class FFTI(object, metaclass=ABCMeta): self.supported_ftypes = (np.float32, np.float64) self.supported_ctypes = (np.complex64, np.complex128) - self.supported_cosine_transforms = (1,2,3) - self.supported_sine_transforms = (1,2,3) + self.supported_cosine_transforms = (1, 2, 3) + self.supported_sine_transforms = (1, 2, 3) self.backend = backend - self.warn_on_allocation = warn_on_allocation + self.warn_on_allocation = warn_on_allocation self.error_on_allocation = error_on_allocation def allocate_output(self, out, shape, dtype): @@ -329,8 +328,8 @@ class FFTI(object, metaclass=ABCMeta): if (out is None): if self.warn_on_allocation or self.error_on_allocation: nbytes = np.prod(shape, dtype=np.int64)*dtype.itemsize - msg='FftwFFT: allocating aligned output array of size {}.' - msg=msg.format(bytes2str(nbytes)) + msg = 'FftwFFT: allocating aligned output array of size {}.' + msg = msg.format(bytes2str(nbytes)) if self.error_on_allocation: raise RuntimeError(msg) else: @@ -344,8 +343,8 @@ class FFTI(object, metaclass=ABCMeta): @classmethod def default_interface(cls, **kwds): """Get the default FFT interface.""" - msg='{}.default_interface() has not been implemented yet !' - msg=msg.format(cls.__name__) + msg = '{}.default_interface() has not been implemented yet !' + msg = msg.format(cls.__name__) raise NotImplementedError(msg) def allocate_plans(self, op, plans, tmp_buffer=None): @@ -353,27 +352,28 @@ class FFTI(object, metaclass=ABCMeta): backend = self.backend tmp_size = max(plan.required_buffer_size for plan in plans) - if (tmp_size>0): - msg='Operator {}: Allocating an additional {} temporary buffer for FFT backend {}.' - msg=msg.format(op.pretty_name, bytes2str(tmp_size), self.__class__.__name__) + if (tmp_size > 0): + msg = 'Operator {}: Allocating an additional {} temporary buffer for FFT backend {}.' + msg = msg.format(op.pretty_name, bytes2str(tmp_size), self.__class__.__name__) if (tmp_buffer is not None): assert tmp_buffer.nbytes >= tmp_size else: if self.error_on_allocation: raise RuntimeError(msg) elif self.warn_on_allocation: + from .gpyfft_fft import HysopGpyFftWarning warnings.warn(msg, HysopGpyFftWarning) else: vprint(msg) tmp_buffer = backend.empty(shape=(tmp_size), dtype=np.uint8) for plan in plans: if (plan.required_buffer_size > tmp_buffer.nbytes): - msg='\nFATAL ERROR: Failed to allocate temporary buffer for clFFT.' - msg+='\n => clFFT expected {} bytes but only {} bytes have been ' - msg+='allocated.\n' - msg=msg.format(plan.required_buffer_size, tmp_buffer.nbytes) + msg = '\nFATAL ERROR: Failed to allocate temporary buffer for clFFT.' + msg += '\n => clFFT expected {} bytes but only {} bytes have been ' + msg += 'allocated.\n' + msg = msg.format(plan.required_buffer_size, tmp_buffer.nbytes) raise RuntimeError(msg) - elif (plan.required_buffer_size>0): + elif (plan.required_buffer_size > 0): buf = tmp_buffer[:plan.required_buffer_size] plan.allocate(buf=buf) else: @@ -424,7 +424,6 @@ class FFTI(object, metaclass=ABCMeta): assert np.array_equal(a.shape, out.shape) return (a.shape, a.dtype) - @abstractmethod def ifft(self, a, out, axis=-1, **kwds): """ @@ -450,7 +449,6 @@ class FFTI(object, metaclass=ABCMeta): assert np.array_equal(a.shape, out.shape) return (a.shape, a.dtype, a.shape[axis]) - @abstractmethod def rfft(self, a, out, axis=-1, **kwds): """ @@ -498,7 +496,6 @@ class FFTI(object, metaclass=ABCMeta): assert np.array_equal(out.shape, cshape) return (cshape, ctype) - @abstractmethod def irfft(self, a, out, n=None, axis=-1, **kwds): """ @@ -531,13 +528,13 @@ class FFTI(object, metaclass=ABCMeta): (shape, dtype, logical_size) of the output array determined from the input array, out and n. """ - assert a.dtype in self.supported_ctypes + assert a.dtype in self.supported_ctypes cshape = a.shape - rtype = complex_to_float_dtype(a.dtype) + rtype = complex_to_float_dtype(a.dtype) rshape_even, rshape_odd = list(a.shape), list(a.shape) rshape_even[axis] = 2*(cshape[axis]-1) - rshape_odd[axis] = 2*(cshape[axis]-1) + 1 + rshape_odd[axis] = 2*(cshape[axis]-1) + 1 if (out is not None): assert out.dtype in self.supported_ftypes @@ -545,16 +542,16 @@ class FFTI(object, metaclass=ABCMeta): ns = out.shape[axis] if (n is not None): - assert ns==n, 'output shape mismatch with specified transformed output size.' + assert ns == n, 'output shape mismatch with specified transformed output size.' else: n = ns - if (n%2==0): + if (n % 2 == 0): assert np.array_equal(out.shape, rshape_even) else: assert np.array_equal(out.shape, rshape_odd) - if (n is None) or (n%2==0): + if (n is None) or (n % 2 == 0): rshape = rshape_even n = rshape[axis] else: @@ -612,9 +609,9 @@ class FFTI(object, metaclass=ABCMeta): (shape, dtype, inverse_type, logical_size) of the output array determined from the input array. """ - itype = [1,3,2,4][type-1] + itype = [1, 3, 2, 4][type-1] n = a.shape[axis] - N = 2*(n - (itype==1)) + N = 2*(n - (itype == 1)) logical_size = N assert a.dtype in self.supported_ftypes, a.dtype assert itype in self.supported_cosine_transforms, self.supported_cosine_transforms @@ -670,9 +667,9 @@ class FFTI(object, metaclass=ABCMeta): (shape, dtype, inverse_type, logical_size) of the output array determined from the input array. """ - itype = [1,3,2,4][type-1] + itype = [1, 3, 2, 4][type-1] n = a.shape[axis] - N = 2*(n + (itype==1)) + N = 2*(n + (itype == 1)) logical_size = N assert a.dtype in self.supported_ftypes, a.dtype assert type in self.supported_sine_transforms, self.supported_sine_transforms @@ -711,23 +708,23 @@ class FFTI(object, metaclass=ABCMeta): """Plan to compute energy from src to energy.""" assert src.ndim == len(transforms) assert dst.ndim == 1 - N = tuple(int(_) for _ in src.shape) - K2 = () + N = tuple(int(_) for _ in src.shape) + K2 = () NS2 = () C2C = () R2C = 0 S = 1.0 - assert len(fshape)==len(N)==len(transforms) + assert len(fshape) == len(N) == len(transforms) for (Fi, Ni, Ti) in zip(fshape, N, transforms): c2c = int(STU.is_C2C(Ti)) r2c = int(STU.is_R2C(Ti) or STU.is_C2R(Ti)) - Ki = Ni//2 if c2c else Ni-1 + Ki = Ni//2 if c2c else Ni-1 if r2c: Si = Fi/2.0 else: Si = Fi S *= Si - K2 += (Ki**2,) + K2 += (Ki**2,) NS2 += ((Ni+1)//2,) C2C += (c2c,) R2C |= r2c @@ -735,7 +732,6 @@ class FFTI(object, metaclass=ABCMeta): # for C2C we need to check j = (i<(N+1)//2 ? i : N-i) max_wavenumber = int(round(sum(K2)**0.5, 0)) - msg='Destination buffer should have size {} but has size {}.'.format(max_wavenumber+1, dst.size) + msg = 'Destination buffer should have size {} but has size {}.'.format(max_wavenumber+1, dst.size) assert (dst.size == max_wavenumber+1), msg return (N, NS2, C2C, R2C, S) - diff --git a/hysop/numerics/fft/gpyfft_fft.py b/hysop/numerics/fft/gpyfft_fft.py index efa7072b5..ef5e285a7 100644 --- a/hysop/numerics/fft/gpyfft_fft.py +++ b/hysop/numerics/fft/gpyfft_fft.py @@ -4,17 +4,19 @@ FFT iterface for fast Fourier Transforms using CLFFT backend (using gpyfft). :class:`~hysop.numerics.GpyFFTPlan` """ -import warnings, struct, primefac +import warnings +import struct +import primefac import numpy as np from abc import abstractmethod from gpyfft.fft import gfft, GFFT from hysop import __KERNEL_DEBUG__, __TRACE_KERNELS__, __VERBOSE__ from hysop.numerics.fft.fft import HysopFFTDataLayoutError, \ - mk_shape, float_to_complex_dtype, complex_to_float_dtype + mk_shape, float_to_complex_dtype, complex_to_float_dtype from hysop.numerics.fft.opencl_fft import OpenClFFTPlanI, OpenClFFTI, \ - OpenClArray, OpenClArrayBackend, \ - OpenClFFTQueue + OpenClArray, OpenClArrayBackend, \ + OpenClFFTQueue from hysop import vprint from hysop.constants import Precision @@ -34,43 +36,45 @@ from hysop.backend.device.opencl.opencl_kernel_launcher import trace_kernel, pro class HysopGpyFftWarning(HysopWarning): pass + # fix a weird bug in clfft/gpyfft keep_plans_ref = [] + class GpyFFTPlan(OpenClFFTPlanI): """ Build a clFFT plan using the gpyfft python interface. Emit warnings when transform output has an unaligned buffer offset. """ - DEBUG=False + DEBUG = False def __new__(cls, cl_env, queue, - in_array, out_array, axes, - scaling=None, scale_by_size=None, - fake_input=None, fake_output=None, - callback_kwds=None, - direction_forward=True, - hardcode_twiddles=False, - warn_on_unaligned_output_offset=True, - warn_on_allocation=True, - error_on_allocation=False, - **kwds): + in_array, out_array, axes, + scaling=None, scale_by_size=None, + fake_input=None, fake_output=None, + callback_kwds=None, + direction_forward=True, + hardcode_twiddles=False, + warn_on_unaligned_output_offset=True, + warn_on_allocation=True, + error_on_allocation=False, + **kwds): obj = super(GpyFFTPlan, cls).__new__(cls) keep_plans_ref.append(obj) return obj def __init__(self, cl_env, queue, - in_array, out_array, axes, - scaling=None, scale_by_size=None, - fake_input=None, fake_output=None, - callback_kwds=None, - direction_forward=True, - hardcode_twiddles=False, - warn_on_unaligned_output_offset=True, - warn_on_allocation=True, - error_on_allocation=False, - **kwds): + in_array, out_array, axes, + scaling=None, scale_by_size=None, + fake_input=None, fake_output=None, + callback_kwds=None, + direction_forward=True, + hardcode_twiddles=False, + warn_on_unaligned_output_offset=True, + warn_on_allocation=True, + error_on_allocation=False, + **kwds): """ Wrap gpyfft.FFT to allow more versatile callback settings and buffer allocations. @@ -119,7 +123,7 @@ class GpyFFTPlan(OpenClFFTPlanI): """ super(GpyFFTPlan, self).__init__(**kwds) - fake_input = first_not_None(fake_input, in_array) + fake_input = first_not_None(fake_input, in_array) fake_output = first_not_None(fake_output, out_array) callback_kwds = first_not_None(callback_kwds, {}) @@ -134,15 +138,15 @@ class GpyFFTPlan(OpenClFFTPlanI): self._queue = queue self.warn_on_unaligned_output_offset = warn_on_unaligned_output_offset - self.warn_on_allocation = warn_on_allocation + self.warn_on_allocation = warn_on_allocation self.error_on_allocation = error_on_allocation self.temp_buffer = None - self._setup = False - self._baked = False - self._allocated = False + self._setup = False + self._baked = False + self._allocated = False - self.in_array = in_array + self.in_array = in_array self.out_array = out_array axes = np.asarray(axes) @@ -156,17 +160,17 @@ class GpyFFTPlan(OpenClFFTPlanI): scale_by_size = first_not_None(scale_by_size, 1) self._setup_kwds = { - 'in_array': in_array, - 'out_array': out_array, - 'fake_input': fake_input, - 'fake_output': fake_output, - 'axes': axes, - 'scaling': scaling, - 'scale_by_size': scale_by_size, - 'direction_forward': direction_forward, - 'hardcode_twiddles': hardcode_twiddles, - 'callback_kwds': callback_kwds - } + 'in_array': in_array, + 'out_array': out_array, + 'fake_input': fake_input, + 'fake_output': fake_output, + 'axes': axes, + 'scaling': scaling, + 'scale_by_size': scale_by_size, + 'direction_forward': direction_forward, + 'hardcode_twiddles': hardcode_twiddles, + 'callback_kwds': callback_kwds + } def setup(self, queue=None): super(GpyFFTPlan, self).setup(queue=queue) @@ -177,24 +181,24 @@ class GpyFFTPlan(OpenClFFTPlanI): return self def setup_plan(self, in_array, out_array, - fake_input, fake_output, - axes, direction_forward, - scaling, scale_by_size, - hardcode_twiddles, callback_kwds): + fake_input, fake_output, + axes, direction_forward, + scaling, scale_by_size, + hardcode_twiddles, callback_kwds): # compute strides from fake arrays t_strides_in, t_distance_in, t_batchsize_in, t_shape_in, t_axes_in = \ - self.calculate_transform_strides(axes, fake_input) + self.calculate_transform_strides(axes, fake_input) t_strides_out, t_distance_out, t_batchsize_out, t_shape_out, t_axes_out = \ - self.calculate_transform_strides(axes, fake_output) + self.calculate_transform_strides(axes, fake_output) if not np.array_equal(t_axes_in, t_axes_out): - msg='Error finding transform axis (consider setting axes argument)' + msg = 'Error finding transform axis (consider setting axes argument)' raise RuntimeError(msg) if not np.array_equal(t_batchsize_in, t_batchsize_out): - msg='Batchsize mismatch: {} vs {}.' - msg=msg.format(t_batchsize_in, t_batchsize_out) + msg = 'Batchsize mismatch: {} vs {}.' + msg = msg.format(t_batchsize_in, t_batchsize_out) raise RuntimeError(msg) # Enforce no input and output overlap (unless inplace) @@ -204,7 +208,7 @@ class GpyFFTPlan(OpenClFFTPlanI): assert (in_array.offset + in_array.nbytes) < out_array.offset elif (in_array.offset > out_array.offset): assert (out_array.offset + out_array.nbytes) < in_array.offset - else: # in_array.offset == out_array.offset + else: # in_array.offset == out_array.offset t_inplace = True # Check data types @@ -221,91 +225,90 @@ class GpyFFTPlan(OpenClFFTPlanI): h_precision = Precision.DOUBLE fp = 'double' else: - msg='Unsupported precision {}.' - msg=msg.format(in_array.dtype) + msg = 'Unsupported precision {}.' + msg = msg.format(in_array.dtype) raise NotImplementedError(msg) for array in (out_array, fake_input, fake_output): if (array.dtype not in valid_precision_types): - msg='Incompatible precisions: Got {} but valid precisions are {} ' - msg+='based on input_array datatype which has been determined to be of kind {}.' - msg=msg.format(array.dtype, valid_precision_types, h_precision) + msg = 'Incompatible precisions: Got {} but valid precisions are {} ' + msg += 'based on input_array datatype which has been determined to be of kind {}.' + msg = msg.format(array.dtype, valid_precision_types, h_precision) raise RuntimeError(msg) # Determine transform layout and expected output shape and dtype - float_types = (np.float32, np.float64) + float_types = (np.float32, np.float64) complex_types = (np.complex64, np.complex128) axe0 = t_axes_in[0] if fake_input.dtype in float_types: - layout_in = gfft.CLFFT_REAL + layout_in = gfft.CLFFT_REAL layout_out = gfft.CLFFT_HERMITIAN_INTERLEAVED expected_output_shape = mk_shape(fake_input.shape, - axe0, fake_input.shape[axe0]//2 +1) + axe0, fake_input.shape[axe0]//2 + 1) expected_output_dtype = float_to_complex_dtype(fake_input.dtype) t_shape = t_shape_in elif fake_input.dtype in complex_types: if fake_output.dtype in float_types: - layout_in = gfft.CLFFT_HERMITIAN_INTERLEAVED + layout_in = gfft.CLFFT_HERMITIAN_INTERLEAVED layout_out = gfft.CLFFT_REAL expected_output_shape = mk_shape(fake_input.shape, - axe0, 2*(fake_input.shape[axe0]-1)) + axe0, 2*(fake_input.shape[axe0]-1)) expected_output_dtype = complex_to_float_dtype(fake_input.dtype) t_shape = t_shape_out elif fake_output.dtype in complex_types: - layout_in = gfft.CLFFT_COMPLEX_INTERLEAVED + layout_in = gfft.CLFFT_COMPLEX_INTERLEAVED layout_out = gfft.CLFFT_COMPLEX_INTERLEAVED expected_output_shape = fake_input.shape expected_output_dtype = fake_input.dtype t_shape = t_shape_in else: - msg='dtype {} is currently not handled.' - msg=msg.format(fake_output.dtype) + msg = 'dtype {} is currently not handled.' + msg = msg.format(fake_output.dtype) raise NotImplementedError(msg) else: - msg='dtype {} is currently not handled.' - msg=msg.format(fake_input.dtype) + msg = 'dtype {} is currently not handled.' + msg = msg.format(fake_input.dtype) raise NotImplementedError(msg) if (fake_output.dtype != expected_output_dtype): - msg='Output array dtype {} does not match expected dtype {}.' - msg=msg.format(fake_output.dtype, expected_output_dtype) + msg = 'Output array dtype {} does not match expected dtype {}.' + msg = msg.format(fake_output.dtype, expected_output_dtype) raise RuntimeError(msg) if not np.array_equal(fake_output.shape, expected_output_shape): - msg='Output array shape {} does not match expected shape {}.' - msg=msg.format(fake_output.shape, expected_output_shape) - if (layout_in == gfft.CLFFT_HERMITIAN_INTERLEAVED) and \ + msg = 'Output array shape {} does not match expected shape {}.' + msg = msg.format(fake_output.shape, expected_output_shape) + if (layout_in == gfft.CLFFT_HERMITIAN_INTERLEAVED) and \ (layout_out == gfft.CLFFT_REAL): expected_output_shape = mk_shape(fake_input.shape, - axe0, 2*(fake_input.shape[axe0]-1) + 1) + axe0, 2*(fake_input.shape[axe0]-1) + 1) if not np.array_equal(fake_output.shape, expected_output_shape): raise RuntimeError(msg) else: raise RuntimeError(msg) if t_inplace and ((layout_in is gfft.CLFFT_REAL) or (layout_out is gfft.CLFFT_REAL)): - assert ((in_array.strides[t_axes_in[0]] == in_array.dtype.itemsize) and \ + assert ((in_array.strides[t_axes_in[0]] == in_array.dtype.itemsize) and (out_array.strides[t_axes_in[0]] == out_array.dtype.itemsize)), \ - 'Inplace real transforms need stride 1 for first transform axis.' + 'Inplace real transforms need stride 1 for first transform axis.' self.check_transform_shape(t_shape) plan = GFFT.create_plan(self.context, t_shape[::-1]) - plan.inplace = t_inplace - plan.strides_in = t_strides_in[::-1] - plan.strides_out = t_strides_out[::-1] - plan.distances = (t_distance_in, t_distance_out) - plan.batch_size = t_batchsize_in - plan.precision = t_precision - plan.layouts = (layout_in, layout_out) + plan.inplace = t_inplace + plan.strides_in = t_strides_in[::-1] + plan.strides_out = t_strides_out[::-1] + plan.distances = (t_distance_in, t_distance_out) + plan.batch_size = t_batchsize_in + plan.precision = t_precision + plan.layouts = (layout_in, layout_out) if (scaling == 'DEFAULT'): pass elif (scaling is not None): - plan.scale_forward = scale - plan.scale_backward = scale + plan.scale_forward = scaling + plan.scale_backward = scaling else: - plan.scale_forward = 1.0 + plan.scale_forward = 1.0 plan.scale_backward = 1.0 - # last transformed axis real output array size N = out_array.shape[axes[-1]] @@ -324,9 +327,9 @@ class GpyFFTPlan(OpenClFFTPlanI): hardcode_twiddles=hardcode_twiddles, **callback_kwds) - self.plan = plan - self.in_data = in_data - self.out_data = out_data + self.plan = plan + self.in_data = in_data + self.out_data = out_data self.is_inplace = t_inplace self.direction_forward = direction_forward @@ -334,8 +337,8 @@ class GpyFFTPlan(OpenClFFTPlanI): def estrides(array): s = array.dtype.itemsize return tuple(x//s for x in array.strides) - msg=\ -''' + msg =\ + ''' ::CLFFT PLANNER DEBUG:: Input array: shape={}, dtype={}, strides={} elements, base_offset={} Output array: shape={}, dtype={}, strides={} elements, base_offset={} @@ -373,25 +376,25 @@ Pre callback source code: Post callback source code: {} '''.format(in_array.shape, in_array.dtype, estrides(in_array), in_array.offset, - out_array.shape, out_array.dtype, estrides(out_array), out_array.offset, - fake_input.shape, fake_input.dtype, estrides(fake_input), - fake_output.shape, fake_output.dtype, estrides(fake_output), - t_distance_in, t_distance_out, t_axes_in, t_axes_out, t_batchsize_in, t_batchsize_out, - t_shape_in, t_shape_out, t_strides_in, t_strides_out, - plan.inplace, None, plan.layouts[0], plan.layouts[1], - plan.shape, plan.strides_in, plan.strides_out, plan.batch_size, - plan.distances[0], plan.distances[1], - plan.scale_forward, plan.scale_backward, - self.pre_callback_src.decode(), self.post_callback_src.decode()) + out_array.shape, out_array.dtype, estrides(out_array), out_array.offset, + fake_input.shape, fake_input.dtype, estrides(fake_input), + fake_output.shape, fake_output.dtype, estrides(fake_output), + t_distance_in, t_distance_out, t_axes_in, t_axes_out, t_batchsize_in, t_batchsize_out, + t_shape_in, t_shape_out, t_strides_in, t_strides_out, + plan.inplace, None, plan.layouts[0], plan.layouts[1], + plan.shape, plan.strides_in, plan.strides_out, plan.batch_size, + plan.distances[0], plan.distances[1], + plan.scale_forward, plan.scale_backward, + self.pre_callback_src.decode(), self.post_callback_src.decode()) print(msg) if (scaling == 'DEFAULT'): pass elif (scaling is not None): - plan.scale_forward = scale - plan.scale_backward = scale + plan.scale_forward = scaling + plan.scale_backward = scaling else: - plan.scale_forward = 1.0 + plan.scale_forward = 1.0 plan.scale_backward = 1.0 # profiling info is delegated to this class, inform the KernelListLauncher @@ -399,43 +402,42 @@ Post callback source code: # custom apply msg self._apply_msg_template = ' fft_{}2{}_{}_{}_{{}}<<<>>>'.format( - 'C' if is_complex(in_array) else 'R', - 'C' if is_complex(out_array) else 'R', - 'forward' if direction_forward else 'backward', - self.__class__.__name__.replace('Gpy','').replace('Plan','_plan').replace('FFT','DFT')) - + 'C' if is_complex(in_array) else 'R', + 'C' if is_complex(out_array) else 'R', + 'forward' if direction_forward else 'backward', + self.__class__.__name__.replace('Gpy', '').replace('Plan', '_plan').replace('FFT', 'DFT')) def set_callbacks(self, plan, axes, N, - in_array, out_array, fake_input, fake_output, - layout_in, layout_out, **kwds): + in_array, out_array, fake_input, fake_output, + layout_in, layout_out, **kwds): """Set plan pre and post callbacks and return in and out array data opencl buffers.""" (in_data, in_fp, oip) = self.compute_input_array_offset(in_array, fake_input, axes) - if (layout_in == gfft.CLFFT_HERMITIAN_INTERLEAVED) and \ + if (layout_in == gfft.CLFFT_HERMITIAN_INTERLEAVED) and \ (layout_out == gfft.CLFFT_REAL): # ******************************************************************************** # CLFFT C2R BUGFIX # Force the zero and the Nyquist frequency of the input to be purely real. (pre_src, user_data) = self.pre_offset_callback_C2R(offset_input_pointer=oip, - in_fp=in_fp, N=N, **kwds) + in_fp=in_fp, N=N, **kwds) # ******************************************************************************** else: (pre_src, user_data) = self.pre_offset_callback(offset_input_pointer=oip, - in_fp=in_fp, N=N, **kwds) + in_fp=in_fp, N=N, **kwds) (out_data, out_fp, oop) = self.compute_output_array_offset(out_array, fake_output, - axes) + axes) (post_src, user_data) = self.post_offset_callback(offset_output_pointer=oop, - out_fp=out_fp, N=N, **kwds) + out_fp=out_fp, N=N, **kwds) # *********************************************************************************** # GPYFFT BUGFIX # Keep a reference to callback source code to prevent dangling const char* pointers. # Do not remove because clfft only get the pointer and gpyfft does not increase the # refcount of those strings, resulting in random code injection into the fft kernels. - pre_src = pre_src.encode('utf-8') + pre_src = pre_src.encode('utf-8') post_src = post_src.encode('utf-8') - self.pre_callback_src = pre_src + self.pre_callback_src = pre_src self.post_callback_src = post_src # *********************************************************************************** @@ -448,27 +450,27 @@ Post callback source code: @classmethod def check_transform_shape(self, shape): """Check that clFFT can handle the logical transform size.""" - valid_factors = {2,3,5,7,11,13} + valid_factors = {2, 3, 5, 7, 11, 13} for Ni in shape: - factors = tuple( primefac.primefac(int(Ni)) ) + factors = tuple(primefac.primefac(int(Ni))) invalid_factors = set(factors) - valid_factors if invalid_factors: factorization = ' * '.join('{}^{}'.format(factor, factors.count(factor)) - for factor in set(factors)) + for factor in set(factors)) candidates = ', '.join(str(vf) for vf in valid_factors) - msg ='\nInvalid transform shape {} for clFFT:' - msg+='\n {} = {}' - msg+='\nOnly {} prime factors are available.' - msg+='\n' - msg=msg.format(shape, Ni, factorization, candidates) + msg = '\nInvalid transform shape {} for clFFT:' + msg += '\n {} = {}' + msg += '\nOnly {} prime factors are available.' + msg += '\n' + msg = msg.format(shape, Ni, factorization, candidates) raise ValueError(msg) @classmethod def calculate_transform_strides(cls, taxes, array): """Redefine gpyfft.FFT.calculate_transform_strides""" - shape = np.asarray(array.shape, dtype=np.uint32) + shape = np.asarray(array.shape, dtype=np.uint32) strides = np.asarray(array.strides, dtype=np.uint32) - dtype = array.dtype + dtype = array.dtype # array dimension and transform dimension ndim = len(shape) @@ -476,7 +478,7 @@ Post callback source code: assert tdim <= ndim # transform axes and batch axes - taxes[taxes<0] += ndim + taxes[taxes < 0] += ndim baxes = np.asarray(tuple(a for a in range(ndim) if (a not in taxes)), dtype=np.uint32) # sort untransformed axes by strides. @@ -484,7 +486,7 @@ Post callback source code: # compute a list of collapsable axes: [ [x,y], [z] ] cal = [] # collaspsable axes list - cac = baxes[:1].tolist() # collaspsable axes candidates + cac = baxes[:1].tolist() # collaspsable axes candidates for a in baxes[1:]: if strides[a] == (strides[cac[-1]] * shape[cac[-1]]): cac.append(a) @@ -493,8 +495,8 @@ Post callback source code: cac = [a] cal.append(cac) - msg='Data layout not supported (only single non-transformed axis allowed)' - if (len(cal)!=1): + msg = 'Data layout not supported (only single non-transformed axis allowed)' + if (len(cal) != 1): raise HysopFFTDataLayoutError(msg) baxes = cal[0] @@ -507,58 +509,58 @@ Post callback source code: batchsize = np.prod(shape[baxes]) - t_shape = shape[taxes] + t_shape = shape[taxes] t_strides = strides[taxes]//dtype.itemsize return (tuple(t_strides), t_distance, batchsize, tuple(t_shape), tuple(taxes)) @classmethod def compute_input_array_offset(cls, real_input, fake_input, axes, - transform_offset='K', idx='k{}', batch_id='b', - void_ptr='input', casted_ptr='in'): + transform_offset='K', idx='k{}', batch_id='b', + void_ptr='input', casted_ptr='in'): - new_input = cls.extract_array(real_input) + new_input = cls.extract_array(real_input) input_offset = cls.get_array_offset(new_input, emit_warning=False) input_data = new_input.base_data - input_fp = dtype_to_ctype(new_input.dtype) + input_fp = dtype_to_ctype(new_input.dtype) offset_input_pointer = \ - cls.compute_pointer_offset(real_array=real_input, fake_array=fake_input, - axes=axes, base_offset=input_offset, - transform_offset=transform_offset, idx=idx, batch_id=batch_id, - fp='const '+input_fp, void_ptr=void_ptr, casted_ptr=casted_ptr, - is_input=True) + cls.compute_pointer_offset(real_array=real_input, fake_array=fake_input, + axes=axes, base_offset=input_offset, + transform_offset=transform_offset, idx=idx, batch_id=batch_id, + fp='const '+input_fp, void_ptr=void_ptr, casted_ptr=casted_ptr, + is_input=True) return (input_data, input_fp, offset_input_pointer) @classmethod def compute_output_array_offset(cls, real_output, fake_output, axes, - transform_offset='K', idx='k{}', batch_id='b', - void_ptr='output', casted_ptr='out'): + transform_offset='K', idx='k{}', batch_id='b', + void_ptr='output', casted_ptr='out'): - new_output = cls.extract_array(real_output) + new_output = cls.extract_array(real_output) output_offset = cls.get_array_offset(new_output, emit_warning=True) output_data = new_output.base_data - output_fp = dtype_to_ctype(new_output.dtype) + output_fp = dtype_to_ctype(new_output.dtype) offset_output_pointer = \ - cls.compute_pointer_offset(real_array=real_output, fake_array=fake_output, - axes=axes, base_offset=output_offset, - transform_offset=transform_offset, idx=idx, batch_id=batch_id, - fp=output_fp, void_ptr=void_ptr, casted_ptr=casted_ptr, - is_input=False) + cls.compute_pointer_offset(real_array=real_output, fake_array=fake_output, + axes=axes, base_offset=output_offset, + transform_offset=transform_offset, idx=idx, batch_id=batch_id, + fp=output_fp, void_ptr=void_ptr, casted_ptr=casted_ptr, + is_input=False) return (output_data, output_fp, offset_output_pointer) @classmethod def compute_pointer_offset(cls, real_array, fake_array, - axes, base_offset, - transform_offset, idx, batch_id, - fp, void_ptr, casted_ptr, - is_input): + axes, base_offset, + transform_offset, idx, batch_id, + fp, void_ptr, casted_ptr, + is_input): fake_strides, fake_distance, fake_batchsize, fake_shape, fake_axes = \ - cls.calculate_transform_strides(axes, fake_array) + cls.calculate_transform_strides(axes, fake_array) assert len(fake_shape) == len(fake_strides) <= 3 ndim = len(fake_shape) @@ -578,21 +580,21 @@ Post callback source code: if is_input: oip += ('if (b>={}) {{ return ({})(NAN); }};'.format(fake_batchsize, fp),) else: - oip += ('if (b>={}) {{ return; }};'.format(fake_batchsize, fp),) + oip += ('if (b>={}) {{ return ; }};'.format(fake_batchsize),) oip += ('{K} -= {b}*{D};'.format(K=K, b=b, D=D),) for i in range(ndim-1, -1, -1): Ki = idx.format('xyz'[i]) Si = S[i] oip += ('const uint {Ki} = {K}/{Si};'.format(Ki=Ki, K=K, Si=Si),) - if (i>0): + if (i > 0): oip += ('{K} -= {Ki}*{Si};'.format(K=K, Ki=Ki, Si=Si),) if (real_array is fake_array): offset = '{base_offset} + offset - {k}'.format(base_offset=base_offset, - K=K, k=k.format('x')) + K=K, k=k.format('x')) else: real_strides, real_distance, real_batchsize, real_shape, real_axes = \ - cls.calculate_transform_strides(axes, real_array) + cls.calculate_transform_strides(axes, real_array) assert fake_batchsize == real_batchsize assert np.array_equal(fake_axes, real_axes) assert len(real_shape) == len(real_strides) == ndim @@ -606,15 +608,15 @@ Post callback source code: offset = ' + '.join(real_offset) oip += ('__global {fp}* {cptr} = (__global {fp}*)({vptr}) + {offset};'.format( - cptr=cptr, vptr=vptr, - fp=fp, offset=offset),) + cptr=cptr, vptr=vptr, + fp=fp, offset=offset),) indent = ' '*12 offset_pointer = indent+'\n{}'.format(indent).join(oip) return offset_pointer @classmethod def extract_array(cls, array): - offset = (array.offset // array.dtype.itemsize) + offset = (array.offset // array.dtype.itemsize) alignment = (array.offset % array.dtype.itemsize) assert (alignment == 0), 'Wrong array alignment.' try: @@ -635,47 +637,49 @@ Post callback source code: dtype = array.dtype if (array.offset % dtype.itemsize) != 0: - msg='Unaligned array offset.' + msg = 'Unaligned array offset.' raise RuntimeError(msg) base_offset = (array.offset // dtype.itemsize) if emit_warning and (base_offset != 0): - msg='OpenCl array offset is not zero and will be injected into a clFFT pre or ' - msg+= 'post callback. This could entail bad results if this buffer is used as ' - msg+= 'an output: the beginning of this buffer may be used as a temporary ' - msg+= 'buffer during the transform before actual results are stored at the right ' - msg+= 'offset through the callback.' + msg = 'OpenCl array offset is not zero and will be injected into a clFFT pre or ' + msg += 'post callback. This could entail bad results if this buffer is used as ' + msg += 'an output: the beginning of this buffer may be used as a temporary ' + msg += 'buffer during the transform before actual results are stored at the right ' + msg += 'offset through the callback.' warnings.warn(msg, HysopGpyFftWarning) return base_offset def bake(self, queue=None): """Bake the plan.""" if self._baked: - msg='Plan was already baked.' + msg = 'Plan was already baked.' raise RuntimeError(msg) + def fmt_arg(name): return self._setup_kwds[name] + def fmt_array(name): arr = fmt_arg(name) return 'shape={:<16} strides={:<16} dtype={:<16}'.format( - str(arr.shape)+',', - str(arr.strides)+',', - str(arr.dtype)) - title=' Baking {} '.format(self.__class__.__name__) + str(arr.shape)+',', + str(arr.strides)+',', + str(arr.dtype)) + title = ' Baking {} '.format(self.__class__.__name__) msg = \ - ''' in_array: {} + ''' in_array: {} out_array: {} fake_input: {} fake_output: {} axes: {} direction_forward: {} hardcode twiddles: {}'''.format( - fmt_array('in_array'), - fmt_array('out_array'), - fmt_array('fake_input'), - fmt_array('fake_output'), - fmt_arg('axes'), - fmt_arg('direction_forward'), - fmt_arg('hardcode_twiddles')) + fmt_array('in_array'), + fmt_array('out_array'), + fmt_array('fake_input'), + fmt_array('fake_output'), + fmt_arg('axes'), + fmt_arg('direction_forward'), + fmt_arg('hardcode_twiddles')) if self.verbose: print() print(framed_str(title, msg, c='*')) @@ -687,14 +691,14 @@ Post callback source code: def allocate(self, buf=None): """Allocate plan extra memory, possibly with a custom buffer.""" if self._allocated: - msg='Plan was already allocated.' + msg = 'Plan was already allocated.' raise RuntimeError(msg) size = self.plan.temp_array_size - if (size>0): + if (size > 0): if (buf is None): if self.warn_on_allocation or self.error_on_allocation: - msg='Allocating temporary buffer of size {} for clFFT::{}.' - msg=msg.format(bytes2str(size), id(self)) + msg = 'Allocating temporary buffer of size {} for clFFT::{}.' + msg = msg.format(bytes2str(size), id(self)) if self.error_on_allocation: raise RuntimeError(msg) else: @@ -702,8 +706,8 @@ Post callback source code: buf = cl.Buffer(self.context, cl.mem_flags.READ_WRITE, size=size) self.temp_buffer = buf elif (buf.size != size): - msg='Buffer does not match required size: {} != {}' - msg=msg.format(buf.size, size) + msg = 'Buffer does not match required size: {} != {}' + msg = msg.format(buf.size, size) raise ValueError(msg) else: self.temp_buffer = buf.data @@ -712,9 +716,8 @@ Post callback source code: self._allocated = True return self - def profile(self, events): - for (i,evt) in enumerate(events): + for (i, evt) in enumerate(events): profile_kernel(None, evt, self._apply_msg_template.format(i)) evt.wait() return evt @@ -736,23 +739,23 @@ Post callback source code: if self.is_inplace: events = self.plan.enqueue_transform((queue,), - (in_data,), - direction_forward=direction_forward, - temp_buffer=self.temp_buffer, - wait_for_events=wait_for) + (in_data,), + direction_forward=direction_forward, + temp_buffer=self.temp_buffer, + wait_for_events=wait_for) else: - #print(self.in_array) - #print(self.out_array) + # print(self.in_array) + # print(self.out_array) events = self.plan.enqueue_transform((queue,), - (in_data,), (out_data), - direction_forward=direction_forward, - temp_buffer=self.temp_buffer, - wait_for_events=wait_for) + (in_data,), (out_data), + direction_forward=direction_forward, + temp_buffer=self.temp_buffer, + wait_for_events=wait_for) evt = self.profile(events) return evt def enqueue_arrays(self, *args, **kwds): - msg='Enqueue arrays is not supported yet.' + msg = 'Enqueue arrays is not supported yet.' raise NotImplementedError(msg) def execute(self, **kwds): @@ -786,7 +789,7 @@ Post callback source code: def pre_offset_callback(self, offset_input_pointer, in_fp, **kwds): """Default pre_offset_callback, just inject input array offset.""" callback = \ - '''{fp} pre_callback(const __global void* input, const uint offset, + '''{fp} pre_callback(const __global void* input, const uint offset, __global void* userdata) {{ {offset_input_pointer} return in[kx]; @@ -800,10 +803,10 @@ Post callback source code: in clfft for even C2R transform of dimension > 1). """ force_real_input = '(kx==0)' - if (N%2==0): # Nyquist freq + if (N % 2 == 0): # Nyquist freq force_real_input += '|| (kx=={n})'.format(n=N//2) callback = \ - '''{fp}2 pre_callback(const __global void* input, const uint offset, + '''{fp}2 pre_callback(const __global void* input, const uint offset, __global void* userdata) {{ {offset_input_pointer} if ({force_real_input}) {{ @@ -813,7 +816,7 @@ Post callback source code: return in[kx]; }} }}'''.format(fp=fp, force_real_input=force_real_input, - offset_input_pointer=offset_input_pointer) + offset_input_pointer=offset_input_pointer) return callback, None def post_offset_callback(self, offset_output_pointer, out_fp, S, **kwds): @@ -823,14 +826,13 @@ Post callback source code: transform or 1). """ callback = \ - '''void post_callback(__global void* output, const uint offset, + '''void post_callback(__global void* output, const uint offset, __global void* userdata, const {fp} R) {{ {offset_output_pointer} out[kx] = R / {S}; }}'''.format(fp=out_fp, offset_output_pointer=offset_output_pointer, S=S) return callback, None - @classmethod def fake_array(cls, shape, dtype, strides=None): """ @@ -864,7 +866,7 @@ Post callback source code: @classmethod def generate_twiddles(cls, name, base, count, typegen, fp, - hardcode_twiddles, idx='kx', Tvar='T'): + hardcode_twiddles, idx='kx', Tvar='T'): """ Generate twiddles as a string. OpenCl __constant static array: exp(base*k0) for k in 0..count @@ -876,7 +878,7 @@ Post callback source code: vals = ',\n'.join(base.format( typegen.dump(x.real), typegen.dump(x.imag), fp=fp) for x in E) twiddles = \ - ''' + ''' __constant const {fp}2 {name}[{N}] = {{ {vals} }}; @@ -898,9 +900,9 @@ class GpyR2RPlan(GpyFFTPlan): """ def __init__(self, in_array, out_array, - fake_input, fake_output, - scale_by_size, axes, - **kwds): + fake_input, fake_output, + scale_by_size, axes, + **kwds): """ Handmade R2R transforms rely on fake input and output that will never really be read or written. This is necessary because @@ -913,45 +915,45 @@ class GpyR2RPlan(GpyFFTPlan): computations to the real array sizes from the fake array indices. """ real_types = (np.float32, np.float64) - msg='Incompatible shapes {} vs {}.'.format(in_array.shape, out_array.shape) + msg = 'Incompatible shapes {} vs {}.'.format(in_array.shape, out_array.shape) assert np.array_equal(in_array.shape, out_array.shape), msg - msg='Incompatible dtypes {} vs {}.'.format(in_array.dtype, out_array.dtype) + msg = 'Incompatible dtypes {} vs {}.'.format(in_array.dtype, out_array.dtype) assert (in_array.dtype == out_array.dtype), msg - msg='Incompatible dtype {}, expected {}.'.format(in_array.dtype, real_types) + msg = 'Incompatible dtype {}, expected {}.'.format(in_array.dtype, real_types) assert (in_array.dtype in real_types), msg - msg='Fake input has not been set.' + msg = 'Fake input has not been set.' assert (fake_input is not None), msg - msg='Fake output has not been set.' + msg = 'Fake output has not been set.' assert (fake_output is not None), msg axis = self.check_r2r_axes(in_array, axes) axes = np.asarray([axis]) super(GpyR2RPlan, self).__init__(in_array=in_array, out_array=out_array, - fake_input=fake_input, fake_output=fake_output, - axes=axes, scale_by_size=scale_by_size, **kwds) + fake_input=fake_input, fake_output=fake_output, + axes=axes, scale_by_size=scale_by_size, **kwds) def setup_plan(self, **kwds): super(GpyR2RPlan, self).setup_plan(**kwds) if self.is_inplace: - msg='R2R transforms cannot be compute inplace on this backend.' + msg = 'R2R transforms cannot be compute inplace on this backend.' raise NotImplementedError(msg) @classmethod def prepare_r2r(cls, in_array, axes): """Return all the required variables to build fake arrays for a all R2R transforms.""" - axis = cls.check_r2r_axes(in_array, axes) - shape = in_array.shape - N = shape[axis] - dtype = in_array.dtype - ctype = float_to_complex_dtype(dtype) + axis = cls.check_r2r_axes(in_array, axes) + shape = in_array.shape + N = shape[axis] + dtype = in_array.dtype + ctype = float_to_complex_dtype(dtype) return (dtype, ctype, shape, axis, N) @classmethod def check_r2r_axes(cls, in_array, axes): """Check that only the last axis is transformed.""" axis = in_array.ndim - 1 - assert len(axes)==1 + assert len(axes) == 1 assert axes[0] in (-1, axis) return axis @@ -963,17 +965,17 @@ class GpyDCTIPlan(GpyR2RPlan): (dtype, ctype, shape, axis, N) = self.prepare_r2r(in_array, axes) rshape = mk_shape(shape, axis, 2*N-2) cshape = mk_shape(shape, axis, N) - fake_input = self.fake_array(shape=rshape, dtype=dtype) + fake_input = self.fake_array(shape=rshape, dtype=dtype) fake_output = self.fake_array(shape=cshape, dtype=ctype) super(GpyDCTIPlan, self).__init__(in_array=in_array, axes=axes, - fake_input=fake_input, fake_output=fake_output, **kwds) + fake_input=fake_input, fake_output=fake_output, **kwds) - #def __del__(self): + # def __del__(self): #print('\ndelete DCT-I plan {}'.format(id(self))) def pre_offset_callback(self, N, fp, offset_input_pointer, **kwds): pre = \ - '''{fp} pre_callback(const __global void* input, const uint offset, + '''{fp} pre_callback(const __global void* input, const uint offset, __global void* userdata) {{ {offset_input_pointer} {fp} ret; @@ -989,7 +991,7 @@ class GpyDCTIPlan(GpyR2RPlan): def post_offset_callback(self, fp, S, offset_output_pointer, **kwds): post = \ - '''void post_callback(__global void* output, const uint offset, + '''void post_callback(__global void* output, const uint offset, __global void* userdata, const {fp}2 R) {{ {offset_output_pointer} out[kx] = R.x/{S}; @@ -997,22 +999,20 @@ class GpyDCTIPlan(GpyR2RPlan): return post, None - - class GpyDCTIIPlan(GpyR2RPlan): def __init__(self, in_array, axes, **kwds): (dtype, ctype, shape, axis, N) = self.prepare_r2r(in_array, axes) rshape = mk_shape(shape, axis, N) cshape = mk_shape(shape, axis, N//2+1) - fake_input = self.fake_array(shape=rshape, dtype=dtype) + fake_input = self.fake_array(shape=rshape, dtype=dtype) fake_output = self.fake_array(shape=cshape, dtype=ctype) super(GpyDCTIIPlan, self).__init__(in_array=in_array, axes=axes, - fake_input=fake_input, fake_output=fake_output, **kwds) + fake_input=fake_input, fake_output=fake_output, **kwds) def pre_offset_callback(self, N, fp, offset_input_pointer, **kwds): n = (N-1)//2 + 1 pre = \ - ''' + ''' {fp} pre_callback(const __global void* input, uint offset, __global void* userdata) {{ {offset_input_pointer} @@ -1028,14 +1028,14 @@ class GpyDCTIIPlan(GpyR2RPlan): return pre, None def post_offset_callback(self, N, S, fp, offset_output_pointer, - typegen, hardcode_twiddles, **kwds): + typegen, hardcode_twiddles, **kwds): n = (N-1)//2 + 1 (twiddle, twiddles) = self.generate_twiddles('dct2_twiddles', - base=-np.pi/(2*N), count=N//2+1, - fp=fp, typegen=typegen, - hardcode_twiddles=hardcode_twiddles) + base=-np.pi/(2*N), count=N//2+1, + fp=fp, typegen=typegen, + hardcode_twiddles=hardcode_twiddles) post = \ - ''' + ''' {twiddles} void post_callback(__global void* output, const uint offset, __global void* userdata, const {fp}2 R) {{ @@ -1048,8 +1048,8 @@ class GpyDCTIIPlan(GpyR2RPlan): out[{N}-kx] = -2*(R.x*T.y + R.y*T.x)/{S}; }} }}'''.format(N=N, S=S, n=n, fp=fp, - twiddle=twiddle, twiddles=twiddles, - offset_output_pointer=offset_output_pointer) + twiddle=twiddle, twiddles=twiddles, + offset_output_pointer=offset_output_pointer) return post, None @@ -1058,26 +1058,26 @@ class GpyDCTIIIPlan(GpyR2RPlan): (dtype, ctype, shape, axis, N) = self.prepare_r2r(in_array, axes) rshape = mk_shape(shape, axis, N) cshape = mk_shape(shape, axis, N//2+1) - fake_input = self.fake_array(shape=cshape, dtype=ctype) + fake_input = self.fake_array(shape=cshape, dtype=ctype) fake_output = self.fake_array(shape=rshape, dtype=dtype) super(GpyDCTIIIPlan, self).__init__(in_array=in_array, axes=axes, - fake_input=fake_input, fake_output=fake_output, **kwds) + fake_input=fake_input, fake_output=fake_output, **kwds) def pre_offset_callback(self, **kwds): - msg='pre_offset_callback_C2R should be used instead.' + msg = 'pre_offset_callback_C2R should be used instead.' raise NotImplementedError(msg) def pre_offset_callback_C2R(self, N, S, fp, typegen, - offset_input_pointer, hardcode_twiddles, **kwds): + offset_input_pointer, hardcode_twiddles, **kwds): (twiddle, twiddles) = self.generate_twiddles('dct3_twiddles', - base=+np.pi/(2*N), count=N//2+1, - fp=fp, typegen=typegen, - hardcode_twiddles=hardcode_twiddles) + base=+np.pi/(2*N), count=N//2+1, + fp=fp, typegen=typegen, + hardcode_twiddles=hardcode_twiddles) force_real_input = '(kx==0)' - if (N%2==0): # Nyquist freq + if (N % 2 == 0): # Nyquist freq force_real_input += '|| (kx=={n})'.format(n=N//2) pre = \ - ''' + ''' {twiddles} {fp}2 pre_callback(const __global void* input, const uint offset, __global void* userdata) {{ @@ -1100,16 +1100,16 @@ class GpyDCTIIIPlan(GpyR2RPlan): }} return C; }}'''.format(N=N, fp=fp, - offset_input_pointer=offset_input_pointer, - twiddle=twiddle, twiddles=twiddles, - force_real_input=force_real_input) + offset_input_pointer=offset_input_pointer, + twiddle=twiddle, twiddles=twiddles, + force_real_input=force_real_input) return pre, None def post_offset_callback(self, N, S, fp, - offset_output_pointer, **kwds): + offset_output_pointer, **kwds): n = (N-1)//2 + 1 post = \ - ''' + ''' void post_callback(__global void* output, const uint offset, __global void* userdata, const {fp} R) {{ {offset_output_pointer} @@ -1120,7 +1120,7 @@ class GpyDCTIIIPlan(GpyR2RPlan): out[2*({N}-kx)-1] = R/{S}; }} }}'''.format(N=N, S=S, n=n, fp=fp, - offset_output_pointer=offset_output_pointer) + offset_output_pointer=offset_output_pointer) return post, None @@ -1130,14 +1130,14 @@ class GpyDSTIPlan(GpyR2RPlan): (dtype, ctype, shape, axis, N) = self.prepare_r2r(in_array, axes) rshape = mk_shape(shape, axis, 2*N+2) cshape = mk_shape(shape, axis, N+2) - fake_input = self.fake_array(shape=rshape, dtype=dtype) + fake_input = self.fake_array(shape=rshape, dtype=dtype) fake_output = self.fake_array(shape=cshape, dtype=ctype) super(GpyDSTIPlan, self).__init__(in_array=in_array, axes=axes, - fake_input=fake_input, fake_output=fake_output, **kwds) + fake_input=fake_input, fake_output=fake_output, **kwds) def pre_offset_callback(self, N, fp, offset_input_pointer, **kwds): pre = \ - '''{fp} pre_callback(const __global void* input, const uint offset, + '''{fp} pre_callback(const __global void* input, const uint offset, __global void* userdata) {{ {offset_input_pointer} {fp} ret; @@ -1156,7 +1156,7 @@ class GpyDSTIPlan(GpyR2RPlan): def post_offset_callback(self, fp, N, S, offset_output_pointer, **kwds): post = \ - '''void post_callback(__global void* output, const uint offset, + '''void post_callback(__global void* output, const uint offset, __global void* userdata, const {fp}2 R) {{ {offset_output_pointer} if ((kx!=0) && (kx!={N}+1)) {{ @@ -1171,15 +1171,15 @@ class GpyDSTIIPlan(GpyR2RPlan): (dtype, ctype, shape, axis, N) = self.prepare_r2r(in_array, axes) rshape = mk_shape(shape, axis, N) cshape = mk_shape(shape, axis, N//2+1) - fake_input = self.fake_array(shape=rshape, dtype=dtype) + fake_input = self.fake_array(shape=rshape, dtype=dtype) fake_output = self.fake_array(shape=cshape, dtype=ctype) super(GpyDSTIIPlan, self).__init__(in_array=in_array, axes=axes, - fake_input=fake_input, fake_output=fake_output, **kwds) + fake_input=fake_input, fake_output=fake_output, **kwds) def pre_offset_callback(self, N, fp, offset_input_pointer, **kwds): n = (N-1)//2 + 1 pre = \ - ''' + ''' {fp} pre_callback(const __global void* input, uint offset, __global void* userdata) {{ {offset_input_pointer} @@ -1195,14 +1195,14 @@ class GpyDSTIIPlan(GpyR2RPlan): return pre, None def post_offset_callback(self, N, S, fp, offset_output_pointer, - typegen, hardcode_twiddles, **kwds): + typegen, hardcode_twiddles, **kwds): n = (N-1)//2 + 1 (twiddle, twiddles) = self.generate_twiddles('dst2_twiddles', - base=-np.pi/(2*N), count=N//2+1, - fp=fp, typegen=typegen, - hardcode_twiddles=hardcode_twiddles) + base=-np.pi/(2*N), count=N//2+1, + fp=fp, typegen=typegen, + hardcode_twiddles=hardcode_twiddles) post = \ - ''' + ''' {twiddles} void post_callback(__global void* output, const uint offset, __global void* userdata, const {fp}2 R) {{ @@ -1215,8 +1215,8 @@ class GpyDSTIIPlan(GpyR2RPlan): out[{N}-kx-1] = +2*(R.x*T.x - R.y*T.y)/{S}; }} }}'''.format(N=N, S=S, n=n, fp=fp, - twiddle=twiddle, twiddles=twiddles, - offset_output_pointer=offset_output_pointer) + twiddle=twiddle, twiddles=twiddles, + offset_output_pointer=offset_output_pointer) return post, None @@ -1225,26 +1225,26 @@ class GpyDSTIIIPlan(GpyR2RPlan): (dtype, ctype, shape, axis, N) = self.prepare_r2r(in_array, axes) rshape = mk_shape(shape, axis, N) cshape = mk_shape(shape, axis, N//2+1) - fake_input = self.fake_array(shape=cshape, dtype=ctype) + fake_input = self.fake_array(shape=cshape, dtype=ctype) fake_output = self.fake_array(shape=rshape, dtype=dtype) super(GpyDSTIIIPlan, self).__init__(in_array=in_array, axes=axes, - fake_input=fake_input, fake_output=fake_output, **kwds) + fake_input=fake_input, fake_output=fake_output, **kwds) def pre_offset_callback(self, **kwds): - msg='pre_offset_callback_C2R should be used instead.' + msg = 'pre_offset_callback_C2R should be used instead.' raise NotImplementedError(msg) def pre_offset_callback_C2R(self, N, S, fp, typegen, - offset_input_pointer, hardcode_twiddles, **kwds): + offset_input_pointer, hardcode_twiddles, **kwds): (twiddle, twiddles) = self.generate_twiddles('dst3_twiddles', - base=+np.pi/(2*N), count=N//2+1, - fp=fp, typegen=typegen, - hardcode_twiddles=hardcode_twiddles) + base=+np.pi/(2*N), count=N//2+1, + fp=fp, typegen=typegen, + hardcode_twiddles=hardcode_twiddles) force_real_input = '(kx==0)' - if (N%2==0): # Nyquist freq + if (N % 2 == 0): # Nyquist freq force_real_input += '|| (kx=={n})'.format(n=N//2) pre = \ - ''' + ''' {twiddles} {fp}2 pre_callback(const __global void* input, const uint offset, __global void* userdata) {{ @@ -1267,16 +1267,16 @@ class GpyDSTIIIPlan(GpyR2RPlan): }} return C; }}'''.format(N=N, fp=fp, - offset_input_pointer=offset_input_pointer, - twiddle=twiddle, twiddles=twiddles, - force_real_input=force_real_input) + offset_input_pointer=offset_input_pointer, + twiddle=twiddle, twiddles=twiddles, + force_real_input=force_real_input) return pre, None def post_offset_callback(self, N, S, fp, - offset_output_pointer, **kwds): + offset_output_pointer, **kwds): n = (N-1)//2 + 1 post = \ - ''' + ''' void post_callback(__global void* output, const uint offset, __global void* userdata, const {fp} R) {{ {offset_output_pointer} @@ -1287,7 +1287,7 @@ class GpyDSTIIIPlan(GpyR2RPlan): out[2*({N}-kx)-1] = -R/{S}; }} }}'''.format(N=N, S=S, n=n, fp=fp, - offset_output_pointer=offset_output_pointer) + offset_output_pointer=offset_output_pointer) return post, None @@ -1333,21 +1333,21 @@ class GpyFFT(OpenClFFTI): """ def __init__(self, cl_env, - backend=None, allocator=None, - warn_on_allocation=True, - warn_on_unaligned_output_offset=True, - error_on_allocation=False, - **kwds): + backend=None, allocator=None, + warn_on_allocation=True, + warn_on_unaligned_output_offset=True, + error_on_allocation=False, + **kwds): super(GpyFFT, self).__init__(cl_env=cl_env, - backend=backend, allocator=allocator, - warn_on_allocation=warn_on_allocation, - error_on_allocation=error_on_allocation, **kwds) + backend=backend, allocator=allocator, + warn_on_allocation=warn_on_allocation, + error_on_allocation=error_on_allocation, **kwds) self.supported_ftypes = (np.float32, np.float64) self.supported_ctypes = (np.complex64, np.complex128) - self.supported_cosine_transforms = (1,2,3) - self.supported_sine_transforms = (1,2,3) + self.supported_cosine_transforms = (1, 2, 3) + self.supported_sine_transforms = (1, 2, 3) self.warn_on_unaligned_output_offset = warn_on_unaligned_output_offset def allocate_output(self, out, shape, dtype): @@ -1355,8 +1355,8 @@ class GpyFFT(OpenClFFTI): if (out is None): if self.warn_on_allocation or self.error_on_allocation: nbytes = prod(shape)*dtype.itemsize - msg='GpyFFT: allocating output array of size {}.' - msg=msg.format(bytes2str(nbytes)) + msg = 'GpyFFT: allocating output array of size {}.' + msg = msg.format(bytes2str(nbytes)) if self.error_on_allocation: raise RuntimeError(msg) else: @@ -1369,23 +1369,23 @@ class GpyFFT(OpenClFFTI): def bake_kwds(self, **kwds): plan_kwds = {} - plan_kwds['in_array'] = kwds.pop('a') - plan_kwds['out_array'] = kwds.pop('out') - plan_kwds['scaling'] = kwds.pop('scaling', None) - plan_kwds['scale_by_size'] = kwds.pop('scale_by_size', None) - plan_kwds['axes'] = kwds.pop('axes', (kwds.pop('axis'),)) - plan_kwds['cl_env'] = kwds.pop('cl_env', self.cl_env) - plan_kwds['queue'] = kwds.pop('queue', self.queue) - plan_kwds['verbose'] = kwds.pop('verbose', __VERBOSE__) - plan_kwds['warn_on_allocation'] = kwds.pop('warn_on_allocation', self.warn_on_allocation) + plan_kwds['in_array'] = kwds.pop('a') + plan_kwds['out_array'] = kwds.pop('out') + plan_kwds['scaling'] = kwds.pop('scaling', None) + plan_kwds['scale_by_size'] = kwds.pop('scale_by_size', None) + plan_kwds['axes'] = kwds.pop('axes', (kwds.pop('axis'),)) + plan_kwds['cl_env'] = kwds.pop('cl_env', self.cl_env) + plan_kwds['queue'] = kwds.pop('queue', self.queue) + plan_kwds['verbose'] = kwds.pop('verbose', __VERBOSE__) + plan_kwds['warn_on_allocation'] = kwds.pop('warn_on_allocation', self.warn_on_allocation) plan_kwds['error_on_allocation'] = kwds.pop('error_on_allocation', self.error_on_allocation) plan_kwds['warn_on_unaligned_output_offset'] = \ - kwds.pop('warn_on_unaligned_output_offset', - self.warn_on_unaligned_output_offset) + kwds.pop('warn_on_unaligned_output_offset', + self.warn_on_unaligned_output_offset) if kwds: - msg='Unknown keyword arguments: {}' - msg=msg.format(', '.join('\'{}\''.format(kwd) for kwd in kwds.keys())) + msg = 'Unknown keyword arguments: {}' + msg = msg.format(', '.join('\'{}\''.format(kwd) for kwd in kwds.keys())) raise RuntimeError(msg) return plan_kwds @@ -1422,14 +1422,14 @@ class GpyFFT(OpenClFFTI): (shape, dtype) = super(GpyFFT, self).dct(a=a, out=out, type=type, axis=axis, **kwds) out = self.allocate_output(out, shape, dtype) kwds = self.bake_kwds(a=a, out=out, axis=axis, **kwds) - if type==1: + if type == 1: plan = GpyDCTIPlan(**kwds) - elif type==2: + elif type == 2: plan = GpyDCTIIPlan(**kwds) - elif type==3: + elif type == 3: plan = GpyDCTIIIPlan(**kwds) else: - msg='Unimplemented cosine transform type {}'.format(itype) + msg = 'Unimplemented cosine transform type {}'.format(type) raise RuntimeError(msg) return plan @@ -1437,24 +1437,23 @@ class GpyFFT(OpenClFFTI): (shape, dtype) = super(GpyFFT, self).dst(a=a, out=out, type=type, axis=axis, **kwds) out = self.allocate_output(out, shape, dtype) kwds = self.bake_kwds(a=a, out=out, axis=axis, **kwds) - if type==1: + if type == 1: plan = GpyDSTIPlan(**kwds) - elif type==2: + elif type == 2: plan = GpyDSTIIPlan(**kwds) - elif type==3: + elif type == 3: plan = GpyDSTIIIPlan(**kwds) else: - msg='Unimplemented sine transform type {}'.format(itype) + msg = 'Unimplemented sine transform type {}'.format(type) raise RuntimeError(msg) return plan def idct(self, a, out=None, type=2, axis=-1, **kwds): (shape, dtype, itype, s) = super(GpyFFT, self).idct(a=a, out=out, type=type, - axis=axis, **kwds) + axis=axis, **kwds) return self.dct(a=a, out=out, type=itype, axis=axis, scale_by_size=s, **kwds) def idst(self, a, out=None, type=2, axis=-1, **kwds): (shape, dtype, itype, s) = super(GpyFFT, self).idst(a=a, out=out, type=type, - axis=axis, **kwds) + axis=axis, **kwds) return self.dst(a=a, out=out, type=itype, axis=axis, scale_by_size=s, **kwds) - diff --git a/hysop/numerics/odesolvers/runge_kutta.py b/hysop/numerics/odesolvers/runge_kutta.py index cefb09bbc..76355c18c 100644 --- a/hysop/numerics/odesolvers/runge_kutta.py +++ b/hysop/numerics/odesolvers/runge_kutta.py @@ -2,18 +2,24 @@ import numpy as np from hysop.tools.types import check_instance, first_not_None from hysop.tools.numerics import is_fp -from hysop.numerics.odesolvers.runge_kutta_coeffs import Itype,Qtype,rk_params, available_methods +from hysop.numerics.odesolvers.runge_kutta_coeffs import Itype, Qtype, rk_params, available_methods # default methods to dump rational and integers expression to string + + def Q_dump(q): - return '({}/{})'.format(q.numerator,q.denominator) + return '({}/{})'.format(q.numerator, q.denominator) + + def I_dump(i): - return '{}'.format(z) + return '{}'.format(i) + class TimeIntegrator(object): def __str__(self): return self.name() + class RungeKutta(TimeIntegrator): pass @@ -21,30 +27,32 @@ class RungeKutta(TimeIntegrator): # Xni = Xn + dt*sum(j=0,i-1,gamma_ij*Knj) # Kni = F(Tni,Xni) # X_n+1 = Xn + dt*sum(i=0,M-1,beta_i*Ki) + + class RhsFunction(object): - def __call__(out, X, t, dt, **kwds): - msg='{}.__call__() has not been overrided.' - msg=msg.format(type(self).__name__) + def __call__(self, out, X, t, dt, **kwds): + msg = '{}.__call__() has not been overrided.' + msg = msg.format(type(self).__name__) raise NotImplementedError(msg) class ExplicitRungeKutta(RungeKutta): - def __init__(self,method,I_dump=I_dump,Q_dump=Q_dump): + def __init__(self, method, I_dump=I_dump, Q_dump=Q_dump): if method not in available_methods(): - msg='{} was not implemented yet! Valid values are {}.' - msg=msg.format(name,implemented_methods.keys()) + msg = '{} was not implemented yet! Valid values are {}.' + msg = msg.format(method, available_methods.keys()) raise ValueError(msg) params = rk_params[method] self.method = method - self.order = params['order'] + self.order = params['order'] self.stages = params['stages'] self.alpha = params['alpha'] - self.beta = params['beta'] + self.beta = params['beta'] self.gamma = params['gamma'] self.I_dump = I_dump @@ -56,13 +64,13 @@ class ExplicitRungeKutta(RungeKutta): """Buffers = dict of (nb_stages+1) np.ndarray of size compatible with Xin""" check_instance(Xin, dict, keys=str, values=np.ndarray) varnames = Xin.keys() - views = first_not_None(views, { k: Ellipsis for (k,v) in Xin.items() }) - Xout = first_not_None(Xout, { k: np.empty_like(v[views[k]]) - for (k,v) in Xin.items() }) - buffers = first_not_None(buffers, { k: tuple(np.empty_like(v) - for i in range(self.stages+1)) - for (k,v) in Xin.items()}) - check_instance(views, dict, keys=str, values=(type(Ellipsis),slice,tuple)) + views = first_not_None(views, {k: Ellipsis for (k, v) in Xin.items()}) + Xout = first_not_None(Xout, {k: np.empty_like(v[views[k]]) + for (k, v) in Xin.items()}) + buffers = first_not_None(buffers, {k: tuple(np.empty_like(v) + for i in range(self.stages+1)) + for (k, v) in Xin.items()}) + check_instance(views, dict, keys=str, values=(type(Ellipsis), slice, tuple)) check_instance(Xout, dict, keys=str, values=np.ndarray) check_instance(buffers, dict, keys=str, values=tuple) assert callable(RHS), type(RHS) @@ -78,20 +86,20 @@ class ExplicitRungeKutta(RungeKutta): assert is_fp(ivar.dtype), ivar.dtype assert is_fp(ovar.dtype), ovar.dtype assert np.all(ivar[view].shape == compute_shape), \ - '{} vs {}'.format(ivar[view].shape, compute_shape) + '{} vs {}'.format(ivar[view].shape, compute_shape) assert np.all(ovar.shape == compute_shape), ovar.shape - assert len(buffers[vname])==self.stages+1, self.stages+1 + assert len(buffers[vname]) == self.stages+1, self.stages+1 for buf in buffers[vname]: assert is_fp(buf.dtype), buf.dtype assert np.all(buf.shape == ivar.shape) - Xtmp = {k: v[0] for (k,v) in buffers.items()} - K = tuple( {k: v[i] for (k,v) in buffers.items()} - for i in range(1, self.stages+1) ) + Xtmp = {k: v[0] for (k, v) in buffers.items()} + K = tuple({k: v[i] for (k, v) in buffers.items()} + for i in range(1, self.stages+1)) for i in range(self.stages): ai = self.alpha[i] ti = t + float(ai)*dt - if (i==0): + if (i == 0): RHS(out=K[i], X=Xin, t=ti, step=i, steps=self.stages, **kwds) else: for vname in varnames: @@ -113,13 +121,13 @@ class ExplicitRungeKutta(RungeKutta): def __eq__(self, other): if not isinstance(other, ExplicitRungeKutta): return NotImplemented - eq = (self.method == other.method) + eq = (self.method == other.method) #eq &= self.I_dump == other.I_dump #eq &= self.Q_dump == other.Q_dump return eq def __ne__(self, other): - eq = (self==other) + eq = (self == other) if isinstance(eq, ExplicitRungeKutta): return NotImplemented return not eq @@ -130,28 +138,28 @@ class ExplicitRungeKutta(RungeKutta): def name(self): return self.method - def dump(self,val): - if isinstance(val,Itype): + def dump(self, val): + if isinstance(val, Itype): return self.I_dump(val) - elif isinstance(val,Qtype): + elif isinstance(val, Qtype): return self.Q_dump(val) else: return '{}'.format(val) # Tni = Tn + alpha_i*dt - def Tni(self,i,Tn,dt): + def Tni(self, i, Tn, dt): alpha = self.alpha[i] if alpha == 0: return '{}'.format(Tn) elif alpha == 1: - return '{} + {}'.format(Tn,dt) + return '{} + {}'.format(Tn, dt) else: - return '{} + {}*{}'.format(Tn,self.dump(alpha),dt) + return '{} + {}*{}'.format(Tn, self.dump(alpha), dt) # Xni = Xn + dt*sum(j=0,i-1,gamma_ij*Knj) - def Xni_sum(self,i): + def Xni_sum(self, i): _sum = '' - gamma = self.gamma[i-1,:] + gamma = self.gamma[i-1, :] for j in range(i): g = gamma[j] if g == 0: @@ -159,14 +167,15 @@ class ExplicitRungeKutta(RungeKutta): elif g == 1: _sum += 'K[{}]'.format(j) else: - _sum += '{}*K[{}]'.format(self.dump(g),j) + _sum += '{}*K[{}]'.format(self.dump(g), j) _sum += ' + ' _sum = _sum[:-3] return _sum - def Xni(self, i,Xn,dt): - if i>0: + + def Xni(self, i, Xn, dt): + if i > 0: _sum = self.Xni_sum(i) - return '{} + {}*({})'.format(Xn,dt,_sum) + return '{} + {}*({})'.format(Xn, dt, _sum) else: return '{}'.format(Xn) @@ -180,35 +189,37 @@ class ExplicitRungeKutta(RungeKutta): elif beta == 1: _sum += 'K[{}]'.format(i) else: - _sum += '{}*K[{}]'.format(self.dump(beta),i) + _sum += '{}*K[{}]'.format(self.dump(beta), i) _sum += ' + ' _sum = _sum[:-3] return _sum - def step(self,Xn,dt): - return '{} + {}*({})'.format(Xn,dt,self.step_sum()) + def step(self, Xn, dt): + return '{} + {}*({})'.format(Xn, dt, self.step_sum()) -Euler = ExplicitRungeKutta('Euler') -RK1 = ExplicitRungeKutta('RK1') -RK2 = ExplicitRungeKutta('RK2') -RK3 = ExplicitRungeKutta('RK3') -RK4 = ExplicitRungeKutta('RK4') + +Euler = ExplicitRungeKutta('Euler') +RK1 = ExplicitRungeKutta('RK1') +RK2 = ExplicitRungeKutta('RK2') +RK3 = ExplicitRungeKutta('RK3') +RK4 = ExplicitRungeKutta('RK4') RK4_38 = ExplicitRungeKutta('RK4_38') if __name__ == '__main__': R = ExplicitRungeKutta('RK4') for i in range(4): - print(R.Tni(i,Tn='To',dt='dt')) + print(R.Tni(i, Tn='To', dt='dt')) print() for i in range(4): - print(R.Xni(i,Xn='Xo',dt='dt')) + print(R.Xni(i, Xn='Xo', dt='dt')) print() - print(R.step(Xn='Xo',dt='dt')) + print(R.step(Xn='Xo', dt='dt')) - Xin = {'a': np.random.rand(10,10).astype(np.float32)} + Xin = {'a': np.random.rand(10, 10).astype(np.float32)} Xout = {'a': np.empty_like(Xin['a'])} print('\nRHS=0') + class Rhs(RhsFunction): def __call__(self, out, X, t, dt, **kwds): out['a'][...] = 0 @@ -219,9 +230,10 @@ if __name__ == '__main__': Integrator(Xin, rhs, dt=dt, Xout=Xout) d = np.max(np.abs((Xout['a']-Xin['a']))) print(' d={}'.format(d)) - assert (d<1e-7), d + assert (d < 1e-7), d print('\nRHS=1') + def rhs(out, X, t, dt, **kwds): out['a'][...] = 1 for Integrator in (Euler, RK2, RK3, RK4, RK4_38): @@ -230,9 +242,10 @@ if __name__ == '__main__': Integrator(Xin, rhs, dt=dt, Xout=Xout) d = np.max(np.abs((Xout['a']-Xin['a'])-dt)) print(' d={}'.format(d)) - assert (d<1e-7), d + assert (d < 1e-7), d print('\nRHS=cos(t)') + def rhs(out, X, t, dt, **kwds): out['a'][...] = np.cos(t) for Integrator in (Euler, RK2, RK3, RK4, RK4_38): @@ -241,13 +254,14 @@ if __name__ == '__main__': Integrator(Xin, rhs, dt=dt, Xout=Xout) alpha = np.asarray(tuple(float(x) for x in Integrator.alpha), dtype=np.float32) - beta = np.asarray(tuple(float(x) for x in Integrator.beta), dtype=np.float32) + beta = np.asarray(tuple(float(x) for x in Integrator.beta), dtype=np.float32) dX = np.dot(np.cos(alpha*dt), beta) d = np.max(np.abs((Xout['a']-Xin['a'])-dX*dt)) print(' d={}'.format(d)) - assert (d<1e-7), d + assert (d < 1e-7), d print('\nRHS=f(x)') + def rhs(out, X, t, dt, **kwds): out['a'][...] = 0.01*X['a'] + 0.02 for Integrator in (Euler, RK2, RK3, RK4, RK4_38): @@ -256,28 +270,29 @@ if __name__ == '__main__': Integrator(Xin, rhs, dt=dt, Xout=Xout) alpha = np.asarray(tuple(float(x) for x in Integrator.alpha), dtype=np.float32) - beta = np.asarray(tuple(float(x) for x in Integrator.beta), dtype=np.float32) - K = [None,]*Integrator.stages + beta = np.asarray(tuple(float(x) for x in Integrator.beta), dtype=np.float32) + K = [None, ]*Integrator.stages beta = np.asarray(tuple(float(x) for x in Integrator.beta), dtype=np.float32) for i in range(Integrator.stages): - if i==0: + if i == 0: X = Xin['a'].copy() else: X[...] = 0 for j in range(i): - gij = float(Integrator.gamma[i-1,j]) + gij = float(Integrator.gamma[i-1, j]) X[...] += float(gij)*K[j] X[...] *= dt X[...] += Xin['a'] K[i] = 0.01*X + 0.02 dX = np.zeros_like(X) - for i,bi in enumerate(beta): + for i, bi in enumerate(beta): dX += bi*K[i] d = np.max(np.abs((Xout['a']-Xin['a'])-dX*dt)) print(' d={}'.format(d)) - assert (d<1e-7), d + assert (d < 1e-7), d print('\nRHS=f(x, t)') + def rhs(out, X, t, dt, **kwds): out['a'][...] = 0.01*X['a'] + 2*t for Integrator in (Euler, RK2, RK3, RK4, RK4_38): @@ -286,25 +301,25 @@ if __name__ == '__main__': Integrator(Xin, rhs, dt=dt, Xout=Xout) alpha = np.asarray(tuple(float(x) for x in Integrator.alpha), dtype=np.float32) - beta = np.asarray(tuple(float(x) for x in Integrator.beta), dtype=np.float32) - K = [None,]*Integrator.stages + beta = np.asarray(tuple(float(x) for x in Integrator.beta), dtype=np.float32) + K = [None, ]*Integrator.stages alpha0 = np.asarray(tuple(float(x) for x in Integrator.alpha), dtype=np.float32) beta = np.asarray(tuple(float(x) for x in Integrator.beta), dtype=np.float32) for i in range(Integrator.stages): ti = 0+alpha[i]*dt - if i==0: + if i == 0: X = Xin['a'].copy() else: X[...] = 0 for j in range(i): - gij = float(Integrator.gamma[i-1,j]) + gij = float(Integrator.gamma[i-1, j]) X[...] += float(gij)*K[j] X[...] *= dt X[...] += Xin['a'] K[i] = 0.01*X + 2*ti dX = np.zeros_like(X) - for i,bi in enumerate(beta): + for i, bi in enumerate(beta): dX += bi*K[i] d = np.max(np.abs((Xout['a']-Xin['a'])-dX*dt)) print(' d={}'.format(d)) - assert (d<1e-7), d + assert (d < 1e-7), d diff --git a/hysop/numerics/remesh/kernel_generator.py b/hysop/numerics/remesh/kernel_generator.py index 7333e96b2..a239e09e9 100644 --- a/hysop/numerics/remesh/kernel_generator.py +++ b/hysop/numerics/remesh/kernel_generator.py @@ -1,12 +1,16 @@ -import os, hashlib, gzip +import os +import hashlib +import gzip import numpy as np import scipy as sp import sympy as sm from scipy import interpolate -from hysop.tools.io_utils import IO -from hysop.tools.numerics import mpq,mpfr,mpqize,f2q -from hysop.tools.cache import load_data_from_cache, update_cache +from hysop.tools.io_utils import IO +from hysop.tools.numerics import mpq, mpfr, mpqize, f2q +from hysop.tools.cache import load_data_from_cache, update_cache +from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta + class Kernel(object): def __init__(self, register=False, verbose=False, split_polys=False, **kargs): @@ -18,9 +22,9 @@ class Kernel(object): """ dic = kargs - varnames = ['n','r','deg','Ms','Mh','H','remesh','P'] + varnames = ['n', 'r', 'deg', 'Ms', 'Mh', 'H', 'remesh', 'P'] for var in varnames: - setattr(self,var,dic[var]) + setattr(self, var, dic[var]) self.split_polys = split_polys self.varnames = varnames @@ -31,13 +35,13 @@ class Kernel(object): self._hashable = True @classmethod - def hash_kernel_key(cls,n,r,deg,Ms,H,remesh): - s = '{}_{}_{}_{}_{}_{}'.format(n,r,deg,Ms,H,int(remesh)) + def hash_kernel_key(cls, n, r, deg, Ms, H, remesh): + s = '{}_{}_{}_{}_{}_{}'.format(n, r, deg, Ms, H, int(remesh)) return hashlib.sha256(s.encode('utf-8')).hexdigest() @classmethod def cache_file(cls): - _cache_dir = IO.cache_path() + '/numerics' + _cache_dir = IO.cache_path() + '/numerics' _cache_file = _cache_dir + '/remesh.pklz' return _cache_file @@ -50,22 +54,22 @@ class Kernel(object): return eq def __ne__(self, other): - eq = (self==other) + eq = (self == other) if isinstance(eq, ExplicitRungeKutta): return NotImplemented return not eq def __hash__(self): - h = self.hash_kernel_key(self.n,self.r,self.deg,self.Ms,self.H,self.remesh) - return hash((h,self.split_polys)) + h = self.hash_kernel_key(self.n, self.r, self.deg, self.Ms, self.H, self.remesh) + return hash((h, self.split_polys)) - def _build(self,verbose,split_polys): + def _build(self, verbose, split_polys): - #polynom symbolic variables + # polynom symbolic variables x = sm.Symbol('x') t = sm.Symbol('t') - #usefull vars + # usefull vars Ms = self.Ms deg = self.deg P = self.P @@ -74,32 +78,32 @@ class Kernel(object): if verbose: print(' => Substitution in Polynomials') for Pix in P: - print(' ',Pix.all_coeffs()[::-1]) + print(' ', Pix.all_coeffs()[::-1]) print() for Pix in P: - print(' ',sm.horner(Pix)) + print(' ', sm.horner(Pix)) print() - #split polynomials + # split polynomials X = np.arange(-Ms, +Ms+1) if split_polys: Pt_l = [] Pt_r = [] Pt_L = [] Pt_R = [] - Cl = np.empty(shape=(deg+1,2*Ms), dtype=np.float64) - Cr = np.empty(shape=(deg+1,2*Ms), dtype=np.float64) - CL = np.empty(shape=(deg+1,2*Ms), dtype=np.float64) - CR = np.empty(shape=(deg+1,2*Ms), dtype=np.float64) - for i,Pix in enumerate(P): - Pit_l = sm.polys.polytools.poly(Pix.as_expr().xreplace({x:t+X[i]}), t, domain='QQ') - Pit_r = sm.polys.polytools.poly(Pix.as_expr().xreplace({x:X[i]+1-t}), t, domain='QQ') - Pit_L = sm.polys.polytools.poly(Pix.as_expr().xreplace({x:+1-t+X[i]}), t, domain='QQ') - Pit_R = sm.polys.polytools.poly(Pix.as_expr().xreplace({x:X[i]-t}), t, domain='QQ') - Cl[:,i] = np.asarray(Pit_l.all_coeffs(), dtype=np.float64) - Cr[:,i] = np.asarray(Pit_r.all_coeffs(), dtype=np.float64) - CL[:,i] = np.asarray(Pit_L.all_coeffs(), dtype=np.float64) - CR[:,i] = np.asarray(Pit_R.all_coeffs(), dtype=np.float64) + Cl = np.empty(shape=(deg+1, 2*Ms), dtype=np.float64) + Cr = np.empty(shape=(deg+1, 2*Ms), dtype=np.float64) + CL = np.empty(shape=(deg+1, 2*Ms), dtype=np.float64) + CR = np.empty(shape=(deg+1, 2*Ms), dtype=np.float64) + for i, Pix in enumerate(P): + Pit_l = sm.polys.polytools.poly(Pix.as_expr().xreplace({x: t+X[i]}), t, domain='QQ') + Pit_r = sm.polys.polytools.poly(Pix.as_expr().xreplace({x: X[i]+1-t}), t, domain='QQ') + Pit_L = sm.polys.polytools.poly(Pix.as_expr().xreplace({x: +1-t+X[i]}), t, domain='QQ') + Pit_R = sm.polys.polytools.poly(Pix.as_expr().xreplace({x: X[i]-t}), t, domain='QQ') + Cl[:, i] = np.asarray(Pit_l.all_coeffs(), dtype=np.float64) + Cr[:, i] = np.asarray(Pit_r.all_coeffs(), dtype=np.float64) + CL[:, i] = np.asarray(Pit_L.all_coeffs(), dtype=np.float64) + CR[:, i] = np.asarray(Pit_R.all_coeffs(), dtype=np.float64) Pt_l.append(Pit_l) Pt_r.append(Pit_r) Pt_L.append(Pit_L) @@ -116,27 +120,27 @@ class Kernel(object): if verbose: print(' => Splitting polynomials') for p in Pt_l: - print(' ',p.all_coeffs()) + print(' ', p.all_coeffs()) print() for p in Pt_r: - print(' ',p.all_coeffs()) + print(' ', p.all_coeffs()) print() print() for p in Pt_l: - print(' ',sm.horner(p)) + print(' ', sm.horner(p)) print() for p in Pt_r: - print(' ',sm.horner(p)) + print(' ', sm.horner(p)) else: - Pt = [] + Pt = [] Pt_L = [] - C = np.empty(shape=(deg+1,2*Ms), dtype=np.float64) - CL = np.empty(shape=(deg+1,2*Ms), dtype=np.float64) - for i,Pix in enumerate(P): - Pit = sm.polys.polytools.poly(Pix.as_expr().xreplace({x:t+X[i]}), t, domain='QQ') - Pit_L = sm.polys.polytools.poly(Pix.as_expr().xreplace({x:1-t+X[i]}), t, domain='QQ') - C[:,i] = np.asarray(Pit.all_coeffs(), dtype=np.float64) - CL[:,i] = np.asarray(Pit_L.all_coeffs(), dtype=np.float64) + C = np.empty(shape=(deg+1, 2*Ms), dtype=np.float64) + CL = np.empty(shape=(deg+1, 2*Ms), dtype=np.float64) + for i, Pix in enumerate(P): + Pit = sm.polys.polytools.poly(Pix.as_expr().xreplace({x: t+X[i]}), t, domain='QQ') + Pit_L = sm.polys.polytools.poly(Pix.as_expr().xreplace({x: 1-t+X[i]}), t, domain='QQ') + C[:, i] = np.asarray(Pit.all_coeffs(), dtype=np.float64) + CL[:, i] = np.asarray(Pit_L.all_coeffs(), dtype=np.float64) Pt.append(Pit) Pt_L.append(Pit_L) self.Pt = Pt @@ -147,32 +151,34 @@ class Kernel(object): if verbose: print(' => Substituing x = t+i') for Pit in Pt: - print(' ',Pit.all_coeffs()[::-1]) + print(' ', Pit.all_coeffs()[::-1]) print() for Pit in Pt: - print(' ',sm.horner(Pit)) + print(' ', sm.horner(Pit)) print() if verbose: print(' => Generating lambdas') - imin=-Ms - imax=+Ms + imin = -Ms + imax = +Ms if split_polys: - gamma_l = sp.interpolate.PPoly(Cl,X,extrapolate=False) - gamma_r = sp.interpolate.PPoly(Cr,X,extrapolate=False) + gamma_l = sp.interpolate.PPoly(Cl, X, extrapolate=False) + gamma_r = sp.interpolate.PPoly(Cr, X, extrapolate=False) + def gamma(x): - i = np.floor(x) - z = x-i - res = (z>=0.5)*gamma_r(i+1-z) + (z<0.5)*gamma_l(x) - res[np.isnan(res)] = 0.0 - return res + i = np.floor(x) + z = x-i + res = (z >= 0.5)*gamma_r(i+1-z) + (z < 0.5)*gamma_l(x) + res[np.isnan(res)] = 0.0 + return res else: - gamma_ = sp.interpolate.PPoly(C,X,extrapolate=False) + gamma_ = sp.interpolate.PPoly(C, X, extrapolate=False) + def gamma(x): - res = gamma_(x) - res[np.isnan(res)] = 0.0 - return res + res = gamma_(x) + res[np.isnan(res)] = 0.0 + return res self.I = X self.gamma = gamma @@ -187,16 +193,15 @@ class Kernel(object): print(' All done.') def _register(self, dic): - key = self.hash_kernel_key(self.n,self.r,self.deg,self.Ms,self.H,self.remesh) + key = self.hash_kernel_key(self.n, self.r, self.deg, self.Ms, self.H, self.remesh) update_cache(self.cache_file(), key, dic) - def __call__(self,x): + def __call__(self, x): Ms = self.Ms res = self.gamma(x) return res - class SymmetricKernelGenerator(object): """ Generate a symmetric piecewise polynomial that @@ -206,28 +211,28 @@ class SymmetricKernelGenerator(object): SINGULAR_SYSTEM = {} - def __init__(self,verbose=False): - self.verbose = verbose + def __init__(self, verbose=False): + self.verbose = verbose self.configured = False - def configure(self,n,H=None): - if n==1: # For linear kernel L1_0 - Ms=1 - H = np.zeros(2,dtype=int) + def configure(self, n, H=None): + if n == 1: # For linear kernel L1_0 + Ms = 1 + H = np.zeros(2, dtype=int) H[Ms] = 1 self.remesh = True self.H = H self.Mh = None - self.n = n - self.Ms = Ms + self.n = n + self.Ms = Ms self.configured = True return self - assert n>0 and n%2==0, 'n not even or n<=0.' + assert n > 0 and n % 2 == 0, 'n not even or n<=0.' Ms = n//2+1 if H is None: - H = np.zeros(2*Ms+1,dtype=int) + H = np.zeros(2*Ms+1, dtype=int) H[Ms] = 1 self.remesh = True else: @@ -242,28 +247,27 @@ class SymmetricKernelGenerator(object): Mh = [] for q in range(n+1): mq = mpq(0) - for i,h in enumerate(H,start=-Ms): + for i, h in enumerate(H, start=-Ms): mq += (i**q) * h Mh.append(mq) self.Mh = Mh - self.n = n - self.Ms = Ms + self.n = n + self.Ms = Ms self.configured = True return self - def solve(self, r, override_cache=False, split_polys=False, no_wrap=False): assert self.configured, 'SymmetricKernelGenerator has not been configured.' - assert r>=0, 'r<0.' + assert r >= 0, 'r<0.' deg = 2*r+1 - n = self.n - Ms = self.Ms - H = self.H - Mh = self.Mh + n = self.n + Ms = self.Ms + H = self.H + Mh = self.Mh - remesh = self.remesh + remesh = self.remesh verbose = self.verbose if no_wrap: @@ -283,12 +287,12 @@ class SymmetricKernelGenerator(object): print(' Mh = ['+','.join([m.__str__() for m in Mh])+']') print() - H = mpqize(np.asarray(H)) + H = mpqize(np.asarray(H)) if not self.remesh: Mh = mpqize(np.asarray(Mh)) # check if kernel was not already cached - key = Kernel.hash_kernel_key(n,r,deg,Ms,H,remesh) + key = Kernel.hash_kernel_key(n, r, deg, Ms, H, remesh) if override_cache: if verbose: print(' Cache overwrite requested.') @@ -302,46 +306,46 @@ class SymmetricKernelGenerator(object): print(' Loading kernel from cache.') if (data == self.SINGULAR_SYSTEM): raise RuntimeError('Could not solve linear system.') - kernel = cls(verbose=verbose,register=False,split_polys=split_polys,**data) + kernel = cls(verbose=verbose, register=False, split_polys=split_polys, **data) return kernel elif verbose: print('False') print(' Building linear system...') - #polynom symbolic variable + # polynom symbolic variable x = sm.Symbol('x') - #build Ms*(deg+1) symbolic coefficients (polynomial unknowns) + # build Ms*(deg+1) symbolic coefficients (polynomial unknowns) coeffs = [] for k in range(Ms): - coeffs.append([sm.symbols('C{k}_{d}'.format(k=k,d=d)) for d in range(deg+1)]) - #build discrete moments rhs values + coeffs.append([sm.symbols('C{k}_{d}'.format(k=k, d=d)) for d in range(deg+1)]) + # build discrete moments rhs values M = [] for i in range(n+1): Mi = [] for j in range(deg+1): - if remesh and i==j: + if remesh and i == j: Mij = f2q(1) - elif not remesh and i-j>=0 and Mh[i-j]!=0: - Mij = sm.symbols('M{i}_{j}'.format(i=i,j=j)) + elif not remesh and i-j >= 0 and Mh[i-j] != 0: + Mij = sm.symbols('M{i}_{j}'.format(i=i, j=j)) else: Mij = 0 Mi.append(Mij) M.append(Mi) - #build the Ms polynomials - Pp = [] - Pm = [] - for i,C in enumerate(coeffs): + # build the Ms polynomials + Pp = [] + Pm = [] + for i, C in enumerate(coeffs): pexpr = f2q(0) mexpr = f2q(0) - for d,c in enumerate(C): + for d, c in enumerate(C): pexpr += c*(x**d) mexpr += c*((-x)**d) - Pp.append(sm.polys.polytools.poly(pexpr,x)) - Pm.append(sm.polys.polytools.poly(mexpr,x)) - P = Pm[::-1] + Pp + Pp.append(sm.polys.polytools.poly(pexpr, x)) + Pm.append(sm.polys.polytools.poly(mexpr, x)) + P = Pm[::-1] + Pp - #precompute the r first polynomial derivatives + # precompute the r first polynomial derivatives dPs = [] for p in Pp: dP = [p] @@ -353,52 +357,54 @@ class SymmetricKernelGenerator(object): # Build overdetermined system of equations eqs = [] - ### Continuity equations (Gamma is Cr continuous) + # Continuity equations (Gamma is Cr continuous) # Parity in x=0 -- Gamma is EVEN -> (r+1)//2 eqs - for d in range(1, r+1,2): - eq = coeffs[0][d] # =0 + for d in range(1, r+1, 2): + eq = coeffs[0][d] # =0 eqs.append(eq) # Right-most point, zero derivatives -> (r+1) eqs for d in range(r+1): - eq = dPs[-1][d].xreplace({x:sm.Integer(Ms)}) # =0 + eq = dPs[-1][d].xreplace({x: sm.Integer(Ms)}) # =0 eqs.append(eq.as_expr()) # Cr-continuity on inner points -> (Ms-1)*(r+1) eqs for d in range(r+1): for i in range(Ms-1): - eq = dPs[i][d].xreplace({x:sm.Integer(i+1)}) - dPs[i+1][d].xreplace({x:sm.Integer(i+1)}) # = 0 + eq = dPs[i][d].xreplace({x: sm.Integer(i+1)}) - dPs[i+1][d].xreplace({x: sm.Integer(i+1)}) # = 0 eqs.append(eq.as_expr()) - ### Interpolation condition on the left -> Ms equations + # Interpolation condition on the left -> Ms equations for i in range(Ms): - eq = Pp[i].xreplace({x:sm.Integer(i)}) - H[Ms+i] # = 0 + eq = Pp[i].xreplace({x: sm.Integer(i)}) - H[Ms+i] # = 0 eqs.append(eq.as_expr()) - ### Discrete moments + # Discrete moments s = sm.symbols('s') for m in range(0, n+1): expr = f2q(0) for l in range(-Ms+1, Ms+1): - if m>0 and l==0: continue + if m > 0 and l == 0: + continue i = Ms-l - e = P[i].xreplace({x:s-f2q(l)}).as_expr() - if m>0: e *= f2q(l**m) + e = P[i].xreplace({x: s-f2q(l)}).as_expr() + if m > 0: + e *= f2q(l**m) expr += e - Pm = sm.polys.polytools.poly(expr,s) - for i,Cmi in enumerate(Pm.all_coeffs()[::-1]): + Pm = sm.polys.polytools.poly(expr, s) + for i, Cmi in enumerate(Pm.all_coeffs()[::-1]): eqs.append(Cmi-M[m][i]) if verbose: print(' => System built.') unknowns = [c for cl in coeffs for c in cl]\ - +[m for ml in M for m in ml if isinstance(m,sm.Symbol)] + + [m for ml in M for m in ml if isinstance(m, sm.Symbol)] if verbose: - print(' Unknowns: ',unknowns) + print(' Unknowns: ', unknowns) - sol = sm.solve(eqs,unknowns) - if len(sol)!=len(unknowns): + sol = sm.solve(eqs, unknowns) + if len(sol) != len(unknowns): if verbose: - print('sol=',sol) + print('sol=', sol) update_cache(Kernel.cache_file(), key, self.SINGULAR_SYSTEM) raise RuntimeError('Could not solve linear system.') elif verbose: @@ -406,19 +412,20 @@ class SymmetricKernelGenerator(object): for k in sorted(sol.keys(), key=lambda x: x.name): print(' {}: {}'.format(k, sol[k])) - for i,Pix in enumerate(P): + for i, Pix in enumerate(P): P[i] = Pix.xreplace(sol) - kernel = cls(n=n,r=r,deg=deg,Ms=Ms, - H=H, Mh=Mh, remesh=remesh, - P=P, register=True, verbose=verbose, - split_polys=split_polys) + kernel = cls(n=n, r=r, deg=deg, Ms=Ms, + H=H, Mh=Mh, remesh=remesh, + P=P, register=True, verbose=verbose, + split_polys=split_polys) return kernel -if __name__=='__main__': + +if __name__ == '__main__': from hysop.numerics.stencil.stencil_generator import StencilGenerator from matplotlib import pyplot as plt - verbose=True + verbose = True sg = StencilGenerator() sg.configure(dim=1, derivative=2) @@ -431,29 +438,29 @@ if __name__=='__main__': H = [0] + H.coeffs.tolist() + [0] kg = SymmetricKernelGenerator(verbose).configure(p, H=H) kernels = [] - for r in [1,2,4,8]: + for r in [1, 2, 4, 8]: try: - kernels.append(kg.solve(r,override_cache=False)) + kernels.append(kg.solve(r, override_cache=False)) except RuntimeError: - print('Solver failed fro p={} and r={}.'.format(p,r)) + print('Solver failed fro p={} and r={}.'.format(p, r)) - if len(kernels)==0: + if len(kernels) == 0: continue continue k0 = kernels[0] fig = plt.figure() plt.xlabel(r'$x$') - plt.ylabel(r'$\Lambda_{'+'{},{}'.format(p,'r')+'}$') - X = np.linspace(-k0.Ms-1,+k0.Ms+1,1000) - s = plt.subplot(1,1,1, label=i) - for i,k in enumerate(kernels): - s.plot(X,k(X),label=r'$\Lambda_{'+'{},{}'.format(p,k.r)+'}$') - s.plot(k0.I,k0.H,'or') + plt.ylabel(r'$\Lambda_{'+'{},{}'.format(p, 'r')+'}$') + X = np.linspace(-k0.Ms-1, +k0.Ms+1, 1000) + s = plt.subplot(1, 1, 1, label=i) + for i, k in enumerate(kernels): + s.plot(X, k(X), label=r'$\Lambda_{'+'{},{}'.format(p, k.r)+'}$') + s.plot(k0.I, k0.H, 'or') axe_scaling = 0.10 ylim = s.get_ylim() Ly = ylim[1] - ylim[0] - s.set_ylim(ylim[0]-axe_scaling*Ly,ylim[1]+axe_scaling*Ly) + s.set_ylim(ylim[0]-axe_scaling*Ly, ylim[1]+axe_scaling*Ly) s.legend() plt.show(block=True) diff --git a/hysop/numerics/splitting/directional_splitting.py b/hysop/numerics/splitting/directional_splitting.py index 2a2228b9e..5a61b1d74 100644 --- a/hysop/numerics/splitting/directional_splitting.py +++ b/hysop/numerics/splitting/directional_splitting.py @@ -9,8 +9,8 @@ class DirectionalSplitting(ComputationalGraphNodeGenerator): @debug def __new__(cls, splitting_dim, extra_kwds, **kargs): - return super(DirectionalSplitting, cls).__new__(cls, - candidate_input_tensors=None, candidate_output_tensors=None, **kargs) + return super(DirectionalSplitting, cls).__new__( + cls, candidate_input_tensors=None, candidate_output_tensors=None, **kargs) @debug def __init__(self, splitting_dim, extra_kwds, **kargs): @@ -30,7 +30,7 @@ class DirectionalSplitting(ComputationalGraphNodeGenerator): raise TypeError(msg) if (splitting_dim not in supported_dimensions): msg = '{}D is not supported by this splitting, supported dimensions are {}.' - msg = msg.format(dim, supported_dimensions) + msg = msg.format(splitting_dim, supported_dimensions) raise ValueError(msg) @debug diff --git a/hysop/numerics/stencil/stencil_generator.py b/hysop/numerics/stencil/stencil_generator.py index 7e2d9e120..8457ee3f7 100644 --- a/hysop/numerics/stencil/stencil_generator.py +++ b/hysop/numerics/stencil/stencil_generator.py @@ -4,24 +4,28 @@ * :class:`~hysop.numerics.stencil.StencilGenerator` """ -import fractions, os, copy, math, gzip +import fractions +import os +import copy +import math +import gzip import itertools as it import numpy as np import scipy as sp import sympy as sm try: - import cPickle as pickle + import cPickle as pickle except: - import pickle + import pickle -from hysop.tools.misc import prod -from hysop.tools.io_utils import IO -from hysop.tools.numerics import MPQ, MPZ, MPFR, F2Q, mpqize, mpq, mpz -from hysop.tools.types import extend_array -from hysop.tools.cache import update_cache, load_data_from_cache +from hysop.tools.misc import prod +from hysop.tools.io_utils import IO +from hysop.tools.numerics import MPQ, MPZ, MPFR, F2Q, mpqize, mpq, mpz +from hysop.tools.types import extend_array +from hysop.tools.cache import update_cache, load_data_from_cache from hysop.tools.sympy_utils import tensor_symbol, tensor_xreplace, \ - factor_split, build_eqs_from_dicts + factor_split, build_eqs_from_dicts from hysop.numerics.stencil.stencil import Stencil, CenteredStencil @@ -32,44 +36,45 @@ except ImportError: flint = None has_flint = False + class StencilGeneratorConfiguration(object): def __init__(self): - self.dim = 1 + self.dim = 1 self.dtype = MPQ - self.dx = sm.Symbol('dx') + self.dx = sm.Symbol('dx') self.user_eqs = {} self.derivative = 1 - self.order = 1 + self.order = 1 self.mask_type = StencilGenerator.CROSS - self._mask = None + self._mask = None self._format_inputs() def copy(self): obj = StencilGeneratorConfiguration() for var in ['dim', 'dtype', 'dx', 'user_eqs', 'derivative', 'order', 'mask_type', - '_mask']: + '_mask']: setattr(obj, var, copy.deepcopy(getattr(self, var))) return obj def configure(self, dim=None, dtype=None, dx=None, user_eqs=None, - derivative=None, order=None, - mask=None, mask_type=None): + derivative=None, order=None, + mask=None, mask_type=None): """ Configure the stencil generator. """ - self.dim = dim if (dim is not None) else self.dim - self.dtype = dtype if (dtype is not None) else self.dtype - self.dx = dx if (dx is not None) else self.dx - self.user_eqs = user_eqs if (user_eqs is not None) else self.user_eqs + self.dim = dim if (dim is not None) else self.dim + self.dtype = dtype if (dtype is not None) else self.dtype + self.dx = dx if (dx is not None) else self.dx + self.user_eqs = user_eqs if (user_eqs is not None) else self.user_eqs self.derivative = derivative if (derivative is not None) else self.derivative - self.order = order if (order is not None) else self.order - self.mask_type = mask_type if (mask_type is not None) else self.mask_type - self._mask = mask if (mask is not None) else self._mask + self.order = order if (order is not None) else self.order + self.mask_type = mask_type if (mask_type is not None) else self.mask_type + self._mask = mask if (mask is not None) else self._mask self._format_inputs() return self @@ -78,18 +83,18 @@ class StencilGeneratorConfiguration(object): raise RuntimeError('First set derivative and order with configure!') return self.derivative + self.order - def L(self,origin): - StencilGeneratorConfiguration._check_origin(origin,self.dim) - shape = self.shape() + def L(self, origin): + StencilGeneratorConfiguration._check_origin(origin, self.dim) + shape = self.shape() return origin - def R(self,origin): - StencilGeneratorConfiguration._check_origin(origin,self.dim) - shape = self.shape() + def R(self, origin): + StencilGeneratorConfiguration._check_origin(origin, self.dim) + shape = self.shape() return shape-origin-1 - def mask(self,origin,custom_mask=None): - StencilGeneratorConfiguration._check_origin(origin,self.dim) + def mask(self, origin, custom_mask=None): + StencilGeneratorConfiguration._check_origin(origin, self.dim) self._check_and_raise() if self.mask == StencilGenerator.CUSTOM: if (custom_mask is None) and (self._mask is None): @@ -103,84 +108,84 @@ class StencilGeneratorConfiguration(object): mask = StencilGeneratorConfiguration._genmask(origin, self.shape(), self.mask_type) return mask - def symbolic_derivatives(self,extra_size=0): - df,df_vars = tensor_symbol(prefix='f',shape=self.shape()+extra_size) - return df,df_vars + def symbolic_derivatives(self, extra_size=0): + df, df_vars = tensor_symbol(prefix='f', shape=self.shape()+extra_size) + return df, df_vars def symbolic_stencil(self, origin): - StencilGeneratorConfiguration._check_origin(origin,self.dim) - S,svars = tensor_symbol(prefix='S',mask=self.mask(origin),shape=self.shape(), - origin=origin) - return S,svars + StencilGeneratorConfiguration._check_origin(origin, self.dim) + S, svars = tensor_symbol(prefix='S', mask=self.mask(origin), shape=self.shape(), + origin=origin) + return S, svars def _format_inputs(self): - if isinstance(self.dx,sm.Symbol) or isinstance(self.dx,sm.Expr) or np.isscalar(self.dx): + if isinstance(self.dx, sm.Symbol) or isinstance(self.dx, sm.Expr) or np.isscalar(self.dx): self.dx = np.asarray([self.dx]*self.dim) dim = self.dim self.derivative = extend_array(self.derivative, dim, dtype=int) - self.order = extend_array(self.order, dim, dtype=int) - self.dx = extend_array(self.dx, dim, dtype=object) + self.order = extend_array(self.order, dim, dtype=int) + self.dx = extend_array(self.dx, dim, dtype=object) def _check_and_raise(self): self._format_inputs() - if self.dim<1: + if self.dim < 1: raise ValueError('Dim < 1!') if self.dtype not in [MPQ, float, np.float32, np.float64]: - raise TypeError('Invalid dtype {}!'.format(dtype)) + raise TypeError('Invalid dtype {}!'.format(self.dtype)) if not isinstance(self.dx, np.ndarray): raise TypeError('Invalid type for dx!') if self.dx.size != self.dim: raise ValueError('Invalid size for dx!') - if not isinstance(self.user_eqs,dict): + if not isinstance(self.user_eqs, dict): raise TypeError('Invalid type for user_eqs!') for k in self.user_eqs: - if not isinstance(k,sm.Symbol): + if not isinstance(k, sm.Symbol): raise TypeError('Invalid type for key {} in user_eqs!'.format(k)) - if (self.derivative<0).any(): + if (self.derivative < 0).any(): raise ValueError('derivative < 0!') - if (self.order<1).any(): + if (self.order < 1).any(): raise ValueError('order < 1!') @staticmethod - def _check_origin(origin,dim): + def _check_origin(origin, dim): if not isinstance(origin, np.ndarray): raise TypeError('Origin is not a np.ndarray!') if origin.size != dim: raise ValueError('Bad dimensions for origin!') - if (origin<0).any(): + if (origin < 0).any(): raise ValueError('Origin component < 0!') @staticmethod - def _genmask(origin,shape,mask): + def _genmask(origin, shape, mask): dim = origin.size - if mask==StencilGenerator.CROSS: - mask = np.zeros(shape,dtype=bool) + if mask == StencilGenerator.CROSS: + mask = np.zeros(shape, dtype=bool) for d in range(dim): - access = [slice(origin[dd],origin[dd]+1) for dd in range(dim)] + access = [slice(origin[dd], origin[dd]+1) for dd in range(dim)] access[d] = slice(None) access = tuple(access) mask[access] = True mask[tuple(access)] = True - elif mask==StencilGenerator.DIAG: - mask = np.ones(shape,dtype=bool) + elif mask == StencilGenerator.DIAG: + mask = np.ones(shape, dtype=bool) for d in range(dim): - access = [slice(origin[dd],origin[dd]+1) for dd in range(dim)] + access = [slice(origin[dd], origin[dd]+1) for dd in range(dim)] access[d] = slice(None) access = tuple(access) mask[access] = False mask[tuple(access)] = False mask[origin] = True - elif mask==StencilGenerator.DENSE: - mask = np.ones(shape,dtype=bool) + elif mask == StencilGenerator.DENSE: + mask = np.ones(shape, dtype=bool) else: raise NotImplementedError('Mask not implemented yet!') return mask def __str__(self): - ss=\ -''' + ss =\ + ''' StencilGeneratorConfiguration dim: {} dtype: {} @@ -192,8 +197,8 @@ StencilGeneratorConfiguration mask: {} shape: {} '''.format(self.dim, self.dtype, self.dx, self.user_eqs, - self.derivative, self.order, self.mask_type, self._mask, - self.shape()) + self.derivative, self.order, self.mask_type, self._mask, + self.shape()) return ss @@ -203,15 +208,15 @@ class StencilGenerator(object): Results are cached and compressed locally. """ - #Stencil masks - DENSE = 0 - CROSS = 1 - DIAG = 2 + # Stencil masks + DENSE = 0 + CROSS = 1 + DIAG = 2 CUSTOM = 99 @classmethod def cache_file(cls): - _cache_dir = IO.cache_path() + '/numerics' + _cache_dir = IO.cache_path() + '/numerics' _cache_file = _cache_dir + '/stencil.pklz' return _cache_file @@ -294,9 +299,9 @@ class StencilGenerator(object): :class:`Stencil` """ config = StencilGeneratorConfiguration() if (config is None) else config - if not isinstance(config,StencilGeneratorConfiguration): + if not isinstance(config, StencilGeneratorConfiguration): raise TypeError('config is not an instance of StencilGeneratorConfiguration!') - self._config = config + self._config = config self._cache_override = cache_override @staticmethod @@ -341,10 +346,10 @@ class StencilGenerator(object): config = self._config.copy() config.configure(**kargs) - dim = config.dim - dtype = config.dtype + dim = config.dim + dtype = config.dtype - if (dim!=1): + if (dim != 1): raise ValueError('Bad dimension for approximation stencil generation!') if (has_flint): @@ -354,25 +359,25 @@ class StencilGenerator(object): else: solve_dtype = dtype - dx = config.dx[0] - k = config.derivative[0] - order = config.order[0] + dx = config.dx[0] + k = config.derivative[0] + order = config.order[0] - N = config.shape()[0] - origin = StencilGenerator._format_origin(origin,N) + N = config.shape()[0] + origin = StencilGenerator._format_origin(origin, N) - L = config.L(origin) - R = config.R(origin) + L = config.L(origin) + R = config.R(origin) if k == 0: - return Stencil([1],[0],0,dx=dx,error=None) + return Stencil([1], [0], 0, dx=dx, error=None) - A = np.empty((N,N),dtype=solve_dtype) - b = np.empty(N,dtype=solve_dtype) + A = np.empty((N, N), dtype=solve_dtype) + b = np.empty(N, dtype=solve_dtype) for i in range(N): - b[i] = solve_dtype(int(i==k)) + b[i] = solve_dtype(int(i == k)) for j in range(N): - A[i,j] = solve_dtype(int((j-origin)**i)) + A[i, j] = solve_dtype(int((j-origin)**i)) try: if has_flint: @@ -382,7 +387,7 @@ class StencilGenerator(object): Ainv = np.asarray(Afmpq_inv.entries()).reshape(A.shape) S = Ainv.dot(b) else: - S = sp.linalg.solve(A,b,overwrite_a=True,overwrite_b=True) + S = sp.linalg.solve(A, b, overwrite_a=True, overwrite_b=True) except: print('\nError: Cannot generate stencil (singular system).\n') raise @@ -399,20 +404,19 @@ class StencilGenerator(object): S = np.vectorize(convert)(S) else: S = S.astype(target_dtype) - elif target_dtype==MPQ: + elif target_dtype == MPQ: if has_flint and (actual_dtype is flint.fmpq): def convert(x): return mpq(mpz(x.p.str()), mpz(x.q.str())) else: def convert(x): - frac = fractions.Fraction(x).limit_denominator((1<<32)-1) + frac = fractions.Fraction(x).limit_denominator((1 << 32)-1) return mpq(frac.numerator, frac.denominator) S = np.vectorize(convert)(S) else: RuntimeError('Type conversion not implemented yet.') - return Stencil(S,origin,order,factor=1/(dx**k), dx=dx) - + return Stencil(S, origin, order, factor=1/(dx**k), dx=dx) def generate_exact_stencil(self, origin, **kargs): """ @@ -425,69 +429,69 @@ class StencilGenerator(object): config.configure(**kargs) config._check_and_raise() - dx = config.dx - dim = config.dim - dtype = config.dtype - order = config.order + dx = config.dx + dim = config.dim + dtype = config.dtype + order = config.order derivative = config.derivative - N = config.shape() - origin = StencilGenerator._format_origin(origin,N) + N = config.shape() + origin = StencilGenerator._format_origin(origin, N) - L = config.L(origin) - R = config.R(origin) + L = config.L(origin) + R = config.R(origin) df, df_vars = config.symbolic_derivatives(4) - S,svars = config.symbolic_stencil(origin) + S, svars = config.symbolic_stencil(origin) user_eqs = config.user_eqs if all(derivative == 0): - return Stencil([1],[0],0,dx=dx,error=None) + return Stencil([1], [0], 0, dx=dx, error=None) - if len(user_eqs)==0: - for i,d in enumerate(derivative): - if dim>1: - access = [slice(0,1) for _ in range(dim)] - access[i] = slice(d,d+1) + if len(user_eqs) == 0: + for i, d in enumerate(derivative): + if dim > 1: + access = [slice(0, 1) for _ in range(dim)] + access[i] = slice(d, d+1) access = tuple(access) - user_eqs[df[access].ravel()[0]]=1 + user_eqs[df[access].ravel()[0]] = 1 else: user_eqs[df[d]] = 1 - def taylor(df,dx,N): + def taylor(df, dx, N): expr = 0 for n in range(max(N)): - expr += taylorn(df,dx,n,N) + expr += taylorn(df, dx, n, N) return expr - def taylorn(df,dx,n,N): + def taylorn(df, dx, n, N): dim = dx.size def preficate(it): - return (it<=N).all() and sum(it)==n + return (it <= N).all() and sum(it) == n - nn = range(n+1) + nn = range(n+1) itd = it.product(nn, repeat=dim) itd = filter(preficate, itd) expr = 0 for der in itd: - expr += taylorn_term(df,dx,der) + expr += taylorn_term(df, dx, der) return expr - def taylorn_term(df,dx,der): + def taylorn_term(df, dx, der): fact = np.asarray([math.factorial(d) for d in der]) return df[der]*prod((dx**der)/fact) expr = 0 for ids in np.ndindex(*N): offset = ids-origin - expr += S[ids]*taylor(df,offset*dx,N) + expr += S[ids]*taylor(df, offset*dx, N) - fact = factor_split(expr,df_vars) - eqs = build_eqs_from_dicts(fact,user_eqs) + fact = factor_split(expr, df_vars) + eqs = build_eqs_from_dicts(fact, user_eqs) - stencil = Stencil(S,origin,order) + stencil = Stencil(S, origin, order) i = 0 sol = {} cache_file = self.cache_file() @@ -495,7 +499,7 @@ class StencilGenerator(object): eqs_key = StencilGenerator._hash_equations(eqs) nsol = load_data_from_cache(cache_file, eqs_key) if (self._cache_override) or (nsol is None): - nsol = sm.solve(eqs,svars) + nsol = sm.solve(eqs, svars) update_cache(cache_file, eqs_key, nsol) if not nsol: break @@ -503,43 +507,45 @@ class StencilGenerator(object): S = tensor_xreplace(S, sol) err = 0 - while err==0: + while err == 0: for ids in np.ndindex(*N): offset = ids-origin - err += S[ids]*taylorn(df,offset*dx,max(N)+i,N+i+1) - if err!=0: - order+=1 - i+=1 + err += S[ids]*taylorn(df, offset*dx, max(N)+i, N+i+1) + if err != 0: + order += 1 + i += 1 eqs = [] - for k,eq in factor_split(err,df_vars).items(): - if isinstance(eq,int): + for k, eq in factor_split(err, df_vars).items(): + if isinstance(eq, int): continue eq = sm.simplify(eq.xreplace(sol)) if eq.atoms(sm.Symbol).intersection(svars): eqs.append(eq) - stencil = Stencil(S,origin,order,dx=dx,error=err) + stencil = Stencil(S, origin, order, dx=dx, error=err) - if (dx==dx[0]).all() and dx[0]!=1: + if (dx == dx[0]).all() and dx[0] != 1: stencil.refactor(dx[0]**(-derivative[0])) return stencil @staticmethod - def _format_origin(origin,shape): - dim = shape.size + def _format_origin(origin, shape): + dim = shape.size origin = np.asarray([origin]*dim, dtype=int) if np.isscalar(origin) else np.asarray(origin) if origin.size != dim: raise ValueError('Bad input dimensions!') - return (origin+shape)%shape + return (origin+shape) % shape + class CenteredStencilGenerator(StencilGenerator): """ Generate various centered stencils. Results are cached and compressed locally. """ + def __init__(self, config=None, cache_override=False): - super(CenteredStencilGenerator,self).__init__(config, cache_override) + super(CenteredStencilGenerator, self).__init__(config, cache_override) def generate_exact_stencil(self, **kargs): config = self._config.copy() @@ -547,8 +553,8 @@ class CenteredStencilGenerator(StencilGenerator): shape = config.shape() origin = (shape-1)//2 - stencil = super(CenteredStencilGenerator,self)\ - .generate_exact_stencil(origin=origin, **kargs) + stencil = super(CenteredStencilGenerator, self)\ + .generate_exact_stencil(origin=origin, **kargs) if stencil.is_centered(): return CenteredStencil.from_stencil(stencil) else: @@ -560,16 +566,14 @@ class CenteredStencilGenerator(StencilGenerator): shape = config.shape() origin = (shape-1)//2 - stencil = super(CenteredStencilGenerator,self)\ - .generate_approximative_stencil(origin, **kargs) + stencil = super(CenteredStencilGenerator, self)\ + .generate_approximative_stencil(origin, **kargs) if stencil.is_centered(): return CenteredStencil.from_stencil(stencil) else: raise RuntimeError('Generated stencil is not centered: {}'.format(stencil.coeffs)) - - if __name__ == '__main__': from hysop.tools.contexts import printoptions @@ -612,24 +616,24 @@ if __name__ == '__main__': print(laplacian.coeffs) laplacian = sg.generate_exact_stencil(origin=1, dx=[sm.Symbol('dy'), sm.Symbol('dx')]) - print('\n',laplacian.coeffs) + print('\n', laplacian.coeffs) - df,_ = sg.get_config().symbolic_derivatives() - user_eqs = {df[0][2] : sm.Symbol('Cx'), df[2][0] : sm.Symbol('Cy')} - stencil = sg.generate_exact_stencil(origin=1,user_eqs=user_eqs) - print('\n',stencil.coeffs) + df, _ = sg.get_config().symbolic_derivatives() + user_eqs = {df[0][2]: sm.Symbol('Cx'), df[2][0]: sm.Symbol('Cy')} + stencil = sg.generate_exact_stencil(origin=1, user_eqs=user_eqs) + print('\n', stencil.coeffs) sg = CenteredStencilGenerator() sg.configure(derivative=2) print('\nCentered second order derivative stencils:') - for i in range(1,4): - stencil = sg.generate_approximative_stencil(order=2*i, dtype=np.float16) + for i in range(1, 4): + stencil = sg.generate_approximative_stencil(origin=0, order=2*i, dtype=np.float16) print(' {}'.format(stencil.coeffs)) print() - for i in range(1,4): - stencil = sg.generate_approximative_stencil(order=2*i, dtype=MPQ) + for i in range(1, 4): + stencil = sg.generate_approximative_stencil(origin=0, order=2*i, dtype=MPQ) print(' {}'.format(stencil.coeffs)) print() - for i in range(1,4): - stencil = sg.generate_exact_stencil(order=2*i) + for i in range(1, 4): + stencil = sg.generate_exact_stencil(origin=0, order=2*i) print(' {}'.format(stencil.coeffs)) diff --git a/hysop/operator/base/integrate.py b/hysop/operator/base/integrate.py index 7a2c1b803..e62bb81fb 100644 --- a/hysop/operator/base/integrate.py +++ b/hysop/operator/base/integrate.py @@ -10,7 +10,7 @@ from hysop.fields.continuous_field import Field from hysop.core.memory.memory_request import MemoryRequest from hysop.topology.cartesian_descriptor import CartesianTopologyDescriptors from hysop.parameters.scalar_parameter import ScalarParameter, TensorParameter -from hysop.default import VolumicIntegrationParameter +from hysop.parameters.default_parameters import VolumicIntegrationParameter class IntegrateBase(object, metaclass=ABCMeta): diff --git a/hysop/operator/base/redistribute_operator.py b/hysop/operator/base/redistribute_operator.py index b43c3bb72..77c09c09e 100644 --- a/hysop/operator/base/redistribute_operator.py +++ b/hysop/operator/base/redistribute_operator.py @@ -32,13 +32,13 @@ class RedistributeOperatorBase(ComputationalGraphOperator, metaclass=ABCMeta): """ return Backend.all - def __new__(cls, variable, source_topo, target_topo, + def __new__(cls, variable, source_topo, target_topo, components=None, name=None, pretty_name=None, **kwds): return super(RedistributeOperatorBase, cls).__new__(cls, name=name, pretty_name=pretty_name, input_fields=None, output_fields=None, **kwds) - def __init__(self, variable, source_topo, target_topo, + def __init__(self, variable, source_topo, target_topo, components=None, name=None, pretty_name=None, **kwds): """ Parameters diff --git a/hysop/operator/integrate.py b/hysop/operator/integrate.py index bd6016e0a..7f578c4fb 100644 --- a/hysop/operator/integrate.py +++ b/hysop/operator/integrate.py @@ -44,60 +44,6 @@ class Integrate(ComputationalGraphNodeFrontend): parameter=parameter, scaling=scaling, expr=expr, base_kwds=base_kwds, **kwds) - @debug - def __new__(cls, field, variables, - parameter=None, scaling=None, - base_kwds=None, expr=None, **kwds): - """ - Initialize a Integrate operator frontend. - - Integrate a field on it compute domain and put the result in a parameter. - - in: field - Possibly as multi-component field that should be integrated. - out: parameter - P = scaling * integral_V(field) - where V is the field domain volume - and scaling depends on specified scaling method. - - parameter - ---------- - field: Field - Input continuous field to be integrated. - variables: dict - dictionary of fields as keys and topologies as values. - parameter: ScalarParameter or TensorParameter - The output parameter that will contain the integral. - Should match field.nb_components. - A default parameter will be created if not specified. - scaling: None, float, str or array-like of str, optional - Scaling method used after integration. - 'volumic': scale by domain size (product of mesh space steps) - 'normalize': scale by first integration (first value will be 1.0) - Defaults to volumic integration. - expr: None, str, optional - expression performed on each entry of the array before sum, elements are referenced as `x[i]` - implementation: Implementation, optional, defaults to None - target implementation, should be contained in available_implementations(). - If None, implementation will be set to default_implementation(). - base_kwds: dict, optional, defaults to None - Base class keywords arguments. - If None, an empty dict will be passed. - kwds: - Extra keywords arguments that will be passed towards implementation - enstrophy operator __init__. - - Notes - ----- - An Integrate operator implementation should at least support - the hysop.operator.base.integrate.IntegrateBase interface. - """ - base_kwds = first_not_None(base_kwds, {}) - return super(Integrate, cls).__new__(cls, - field=field, variables=variables, - parameter=parameter, scaling=scaling, expr=expr, - base_kwds=base_kwds, **kwds) - @debug def __init__(self, field, variables, parameter=None, scaling=None, diff --git a/hysop/operator/memory_reordering.py b/hysop/operator/memory_reordering.py index 18779f0ee..08f1a378f 100644 --- a/hysop/operator/memory_reordering.py +++ b/hysop/operator/memory_reordering.py @@ -145,7 +145,7 @@ class MemoryReordering(ComputationalGraphNodeGenerator): base_kwds = first_not_None(base_kwds, {}) if (not 'mpi_params' in base_kwds): - mpi_params = variables.values()[0].mpi_params + mpi_params = next(iter(variables.values())).mpi_params assert all([_.mpi_params == mpi_params for _ in variables.values()]) kwds.update({'mpi_params': mpi_params}) diff --git a/hysop/parameters/parameter.py b/hysop/parameters/parameter.py index 499beb96f..8aff308c1 100644 --- a/hysop/parameters/parameter.py +++ b/hysop/parameters/parameter.py @@ -103,10 +103,10 @@ class Parameter(TaggedObject, VariableTag, metaclass=ABCMeta): return obj def __init__(self, name, parameter_types, - initial_value=None, allow_None=False, - quiet=False, const=False, - pretty_name=None, var_name=None, - is_view=False, **kwds): + initial_value=None, allow_None=False, + quiet=False, const=False, + pretty_name=None, var_name=None, + is_view=False, **kwds): super(Parameter, self).__init__(tag_prefix='p', variable_kind=Variable.PARAMETER, **kwds) def __eq__(self, other): @@ -185,7 +185,7 @@ class Parameter(TaggedObject, VariableTag, metaclass=ABCMeta): pass @abstractmethod - def _set_value_impl(self): + def _set_value_impl(self, value): pass @abstractmethod diff --git a/hysop/simulation.py b/hysop/simulation.py index 6738824bc..115507a53 100644 --- a/hysop/simulation.py +++ b/hysop/simulation.py @@ -28,7 +28,8 @@ Usage s.finalize() io.apply(s) """ -import sys, os +import sys +import os import numpy as np from abc import ABCMeta, abstractmethod @@ -237,7 +238,7 @@ class Simulation(object): f.write(values) f.flush() else: - msg = 'Unknown format {}.'.format(fileformat) + msg = 'Unknown format {}.'.format(io_params.fileformat) raise ValueError(msg) if self._next_is_last: diff --git a/hysop/symbolic/frame.py b/hysop/symbolic/frame.py index 924d04914..748d974e1 100644 --- a/hysop/symbolic/frame.py +++ b/hysop/symbolic/frame.py @@ -1,6 +1,6 @@ - from hysop.tools.types import first_not_None, check_instance, to_tuple -from hysop.symbolic import dspace_symbols, space_symbols, freq_symbols, time_symbol +from hysop.symbolic import dspace_symbols, space_symbols, freq_symbols, time_symbol, dtime_symbol + class SymbolicFrame(object): """n-dimensional symbolic frame.""" @@ -8,7 +8,7 @@ class SymbolicFrame(object): def __init__(self, dim, freq_axes=None, **kwds): """Initialize a frame with given dimension.""" super(SymbolicFrame, self).__init__(**kwds) - assert dim>0, 'Incompatible dimension.' + assert dim > 0, 'Incompatible dimension.' coords = list(space_symbols[:dim]) if (freq_axes is not None): @@ -43,7 +43,6 @@ class SymbolicFrame(object): """Get the time variable for conveniance.""" return time_symbol - @property def dtime(self): """Get the infinitesimal time variable for conveniance.""" @@ -58,16 +57,16 @@ class SymbolicFrame(object): return self.coords[key] def __str__(self): - ss ='SymbolicFrame:' - ss+='\n *dim: {}' - ss+='\n *coords: {}' - ss+='\n *dcoords: {}' - ss+='\n *vars: {}' - ss=ss.format(self.dim, self.coords, self.dcoords, self.vars) + ss = 'SymbolicFrame:' + ss += '\n *dim: {}' + ss += '\n *coords: {}' + ss += '\n *dcoords: {}' + ss += '\n *vars: {}' + ss = ss.format(self.dim, self.coords, self.dcoords, self.vars) return ss -if __name__ == '__main__': +if __name__ == '__main__': A = SymbolicFrame(8) print(A.dim) print(A.coords) diff --git a/hysop/symbolic/func.py b/hysop/symbolic/func.py index dd3978c1b..b930b7c51 100644 --- a/hysop/symbolic/func.py +++ b/hysop/symbolic/func.py @@ -31,11 +31,11 @@ class SymbolicFunction(FunctionBase, UndefinedFunction): def __new__(cls, name, fn=None, bases=None, **kwds): bases = first_not_None(bases, (AppliedSymbolicFunction,)) return super(SymbolicFunction, cls).__new__(cls, bases=bases, - name=name, fn=fn, **kwds) + name=name, fn=fn, **kwds) def __init__(self, name, fn=None, bases=None, **kwds): super(SymbolicFunction, self).__init__(bases=bases, - name=name, fn=fn, **kwds) + name=name, fn=fn, **kwds) class AppliedSymbolicFunction(AppliedUndef): @@ -59,17 +59,17 @@ class AppliedSymbolicFunction(AppliedUndef): class SymbolicFunctionTensor(TensorBase): """Symbolic tensor symbol.""" def __new__(cls, shape, name=None, fn=None, init=None, - scalar_cls=None, scalar_kwds=None, **kwds): + scalar_cls=None, scalar_kwds=None, **kwds): scalar_cls = first_not_None(scalar_cls, SymbolicFunction) scalar_kwds = first_not_None(scalar_kwds, {}) scalar_kwds.setdefault('fn', fn) return super(SymbolicFunctionTensor, cls).__new__(cls, name=name, shape=shape, init=init, - scalar_cls=scalar_cls, scalar_kwds=scalar_kwds, **kwds) + scalar_cls=scalar_cls, scalar_kwds=scalar_kwds, **kwds) def __init__(self, shape, name=None, fn=None, init=None, - scalar_cls=None, scalar_kwds=None, **kwds): + scalar_cls=None, scalar_kwds=None, **kwds): super(SymbolicFunctionTensor, self).__init__(name=name, shape=None, init=None, - scalar_cls=scalar_cls, scalar_kwds=scalar_kwds, **kwds) + scalar_cls=scalar_cls, scalar_kwds=scalar_kwds, **kwds) def __call__(self, *args, **kwds): return self.elementwise_fn(lambda x: x(*args, **kwds)) @@ -77,8 +77,9 @@ class SymbolicFunctionTensor(TensorBase): def freplace(self): return self.elementwise_fn(lambda x: x.freplace()) + if __name__ == '__main__': - def fn(x0,x1): + def fn(x0, x1): return x0-x1 f = SymbolicFunction('f', fn=sm.cos) @@ -93,8 +94,8 @@ if __name__ == '__main__': I = i(5, space_symbols[1]) J = j(time_symbol, space_symbols[0]) - a = SymbolicFunctionTensor('a', shape=(2,2)) - b = SymbolicFunctionTensor('b', shape=(4,), fn=sm.cos) + a = SymbolicFunctionTensor(name='a', shape=(2, 2)) + b = SymbolicFunctionTensor(name='b', shape=(4,), fn=sm.cos) A = a(time_symbol) B = b(space_symbols[0]) print(f) @@ -127,4 +128,3 @@ if __name__ == '__main__': print() print(A.freplace()) print(B.freplace()) - diff --git a/hysop/symbolic/spectral.py b/hysop/symbolic/spectral.py index 8ba27630e..14491da76 100644 --- a/hysop/symbolic/spectral.py +++ b/hysop/symbolic/spectral.py @@ -1,4 +1,3 @@ - import sympy as sm import numpy as np @@ -9,11 +8,12 @@ from hysop.tools.spectral_utils import SpectralTransformUtils as STU from hysop.symbolic import SpaceSymbol from hysop.symbolic.array import SymbolicBuffer from hysop.symbolic.field import FieldExpressionBuilder, FieldExpressionI, TensorBase, \ - SymbolicField, AppliedSymbolicField + SymbolicField, AppliedSymbolicField from hysop.symbolic.frame import SymbolicFrame from hysop.fields.continuous_field import Field, ScalarField, TensorField from hysop.tools.spectral_utils import SpectralTransformUtils + class WaveNumberIndex(sm.Symbol): def __new__(cls, axis): obj = super(WaveNumberIndex, cls).__new__(cls, 'i{}'.format(axis)) @@ -35,7 +35,7 @@ class WaveNumberIndex(sm.Symbol): @property def real_index(self): if (self._real_index is None): - msg='No axes bound yet !' + msg = 'No axes bound yet !' raise RuntimeError(msg) return self._real_index @@ -44,26 +44,26 @@ class WaveNumber(Dummy): """Wave number symbol for SpectralTransform derivatives (and integrals).""" __transform2str = { - TransformType.FFT: 'c2c', - TransformType.RFFT: 'r2c', - TransformType.DCT_I: 'c1', - TransformType.DCT_II: 'c2', - TransformType.DCT_III: 'c3', - TransformType.DCT_IV: 'c4', - TransformType.DST_I: 's1', - TransformType.DST_II: 's2', - TransformType.DST_III: 's3', - TransformType.DST_IV: 's4', - TransformType.IFFT: 'c2c', - TransformType.IRFFT: 'r2c', - TransformType.IDCT_I: 'c1', - TransformType.IDCT_II: 'c3', - TransformType.IDCT_III: 'c2', - TransformType.IDCT_IV: 'c4', - TransformType.IDST_I: 's1', - TransformType.IDST_II: 's3', - TransformType.IDST_III: 's2', - TransformType.IDST_IV: 's4', + TransformType.FFT: 'c2c', + TransformType.RFFT: 'r2c', + TransformType.DCT_I: 'c1', + TransformType.DCT_II: 'c2', + TransformType.DCT_III: 'c3', + TransformType.DCT_IV: 'c4', + TransformType.DST_I: 's1', + TransformType.DST_II: 's2', + TransformType.DST_III: 's3', + TransformType.DST_IV: 's4', + TransformType.IFFT: 'c2c', + TransformType.IRFFT: 'r2c', + TransformType.IDCT_I: 'c1', + TransformType.IDCT_II: 'c3', + TransformType.IDCT_III: 'c2', + TransformType.IDCT_IV: 'c4', + TransformType.IDST_I: 's1', + TransformType.IDST_II: 's3', + TransformType.IDST_III: 's2', + TransformType.IDST_IV: 's4', } __wave_numbers = {} @@ -84,7 +84,7 @@ class WaveNumber(Dummy): return cls.__wave_numbers[key] tr_str = cls.__transform2str[transform] - if len(tr_str)==2: + if len(tr_str) == 2: tr_pstr = tr_str[0] + subscript(int(tr_str[1])) else: tr_pstr = tr_str @@ -102,10 +102,10 @@ class WaveNumber(Dummy): pretty_name += '__{}'.format(exponent) obj = super(WaveNumber, cls).__new__(cls, - name=name, pretty_name=pretty_name, **kwds) - obj._axis = int(axis) + name=name, pretty_name=pretty_name, **kwds) + obj._axis = int(axis) obj._transform = transform - obj._exponent = int(exponent) + obj._exponent = int(exponent) cls.__wave_numbers[key] = obj @@ -117,16 +117,18 @@ class WaveNumber(Dummy): @property def axis(self): return self._axis + @property def transform(self): return self._transform + @property def exponent(self): return self._exponent @property def is_real(self): - tr = self._transform + tr = self._transform exp = self._exponent is_real = STU.is_R2R(tr) is_real |= ((not STU.is_R2R(tr)) and (exp % 2 == 0)) @@ -134,7 +136,7 @@ class WaveNumber(Dummy): @property def is_complex(self): - tr = self._transform + tr = self._transform exp = self._exponent return ((not STU.is_R2R(tr)) and (exp % 2 != 0)) @@ -153,9 +155,9 @@ class WaveNumber(Dummy): def __eq__(self, other): if not isinstance(other, WaveNumber): return NotImplemented - eq = (self.axis == other.axis) + eq = (self.axis == other.axis) eq &= (self.transform == other.transform) - eq &= (self.exponent == other.exponent) + eq &= (self.exponent == other.exponent) return eq def __hash__(self): @@ -166,16 +168,17 @@ class AppliedSpectralTransform(AppliedSymbolicField): """ An applied spectral transform. """ + def short_description(self): ss = '{}(field={}, axes={}, is_forward={}, transforms=[{}])' return ss.format(self.__class__.__name__, - self.field.pretty_name, self.transformed_axes, - '1' if self.is_forward else '0', - self.format_transforms()) + self.field.pretty_name, self.transformed_axes, + '1' if self.is_forward else '0', + self.format_transforms()) def long_description(self): ss = \ -''' + ''' == {} == *field: {} *transformed_axes: {} @@ -205,21 +208,27 @@ class AppliedSpectralTransform(AppliedSymbolicField): @property def field(self): return self._field + @property def transformed_axes(self): return self._transformed_axes + @property def spatial_axes(self): return self._spatial_axes + @property def freq_vars(self): return self._freq_vars + @property def space_vars(self): return self._space_vars + @property def all_vars(self): return self._all_vars + @property def frame(self): return self._frame @@ -227,12 +236,15 @@ class AppliedSpectralTransform(AppliedSymbolicField): @property def lboundaries(self): return self._field.lboundaries + @property def rboundaries(self): return self._field.rboundaries + @property def domain(self): return self._field.domain + @property def dtype(self): return self._field.dtype @@ -240,14 +252,15 @@ class AppliedSpectralTransform(AppliedSymbolicField): @property def transforms(self): return self._transforms + @property def wave_numbers(self): return self._wave_numbers + @property def is_forward(self): return self._is_forward - # SYMPY INTERNALS ################ @property def is_number(self): @@ -285,7 +298,7 @@ class AppliedSpectralTransform(AppliedSymbolicField): def __ne__(self, other): "Fix sympy v1.2 neq" - return not (self==other) + return not (self == other) ################################### @@ -312,13 +325,13 @@ class SpectralTransform(SymbolicField): check_instance(field, ScalarField) axes = to_tuple(first_not_None(axes, range(field.dim))) check_instance(axes, tuple, values=int, minval=0, - maxval=dim-1, minsize=1) + maxval=dim-1, minsize=1) transformed_axes = tuple(sorted(set(axes))) - spatial_axes = tuple(sorted(set(range(field.dim)) - set(axes))) + spatial_axes = tuple(sorted(set(range(field.dim)) - set(axes))) frame = field.domain.frame - freq_vars = tuple(frame.freqs[dim-1-i] for i in transformed_axes[::-1]) + freq_vars = tuple(frame.freqs[dim-1-i] for i in transformed_axes[::-1]) space_vars = tuple(frame.coords[dim-1-i] for i in spatial_axes[::-1]) all_vars = () @@ -330,7 +343,7 @@ class SpectralTransform(SymbolicField): all_vars = all_vars[::-1] transforms = SpectralTransformUtils.transforms_from_field(field, - transformed_axes=transformed_axes) + transformed_axes=transformed_axes) for i in range(frame.dim): assert (transforms[i] is TransformType.NONE) ^ (i in transformed_axes) @@ -342,7 +355,7 @@ class SpectralTransform(SymbolicField): assert frame.coords == all_vars obj = super(SpectralTransform, cls).__new__(cls, field=field, - bases=(AppliedSpectralTransform,)) + bases=(AppliedSpectralTransform,)) obj._field = field obj._transformed_axes = transformed_axes obj._spatial_axes = spatial_axes @@ -377,14 +390,14 @@ class SpectralTransform(SymbolicField): eq = super(SpectralTransform, self).__eq__(other) if (eq is not True): return eq - for (lhc,rhc) in zip((self._transformed_axes, self._is_forward), - (other._transformed_axes, other._is_forward)): + for (lhc, rhc) in zip((self._transformed_axes, self._is_forward), + (other._transformed_axes, other._is_forward)): eq &= (lhc == rhc) return eq def __ne__(self, other): "Fix sympy v1.2 neq" - return not (self==other) + return not (self == other) if __name__ == '__main__': @@ -400,16 +413,16 @@ if __name__ == '__main__': d = Box(dim=dim, lboundaries=(BoxBoundaryCondition.SYMMETRIC, BoxBoundaryCondition.OUTFLOW, BoxBoundaryCondition.SYMMETRIC), - rboundaries=(BoxBoundaryCondition.SYMMETRIC, - BoxBoundaryCondition.OUTFLOW, - BoxBoundaryCondition.OUTFLOW)) + rboundaries=(BoxBoundaryCondition.SYMMETRIC, + BoxBoundaryCondition.OUTFLOW, + BoxBoundaryCondition.OUTFLOW)) U = VelocityField(domain=d) W = VorticityField(velocity=U) psi = W.field_like(name='psi', pretty_name=Greak[23]) - W_hat = SpectralTransform(W, forward=True) - U_hat = SpectralTransform(U, forward=False) + W_hat = SpectralTransform(W, forward=True) + U_hat = SpectralTransform(U, forward=False) psi_hat = SpectralTransform(psi) eqs = laplacian(psi_hat, psi_hat.frame) - W_hat @@ -438,4 +451,3 @@ if __name__ == '__main__': for tr in trs: print(tr.short_description()) print(wn) - diff --git a/hysop/tools/enum.py b/hysop/tools/enum.py index e5aed8ad1..b2bf38b5b 100644 --- a/hysop/tools/enum.py +++ b/hysop/tools/enum.py @@ -2,17 +2,19 @@ * :class:`~EnumFactory` """ import numpy as np -import keyword, re +import keyword +import re registered_enums = {} + class _EnumInstanceGenerator(object): # pickle instance generator def __call__(self, enum_name, enum_field): if enum_name not in registered_enums: - msg='Pickle: trying to create an instance of {} but ' - msg+='this enum has not been registered yet.' - msg=msg.format(enum_name) + msg = 'Pickle: trying to create an instance of {} but ' + msg += 'this enum has not been registered yet.' + msg = msg.format(enum_name) raise RuntimeError(msg) enum_cls = registered_enums[enum_name] assert enum_field in enum_cls.fields().keys() @@ -79,8 +81,8 @@ class EnumFactory(object): """ if not fields: - msg='fields should contain at least one entry, got {}.' - msg=msg.format(fields) + msg = 'fields should contain at least one entry, got {}.' + msg = msg.format(fields) raise ValueError(msg) if isinstance(fields, dict): @@ -93,126 +95,132 @@ class EnumFactory(object): raise ValueError(msg) fields = dict(zip(fields, range(len(fields)))) else: - msg='fields have to be of type list,set,tuple or dict but got {}.' - msg=msg.format(fields.__class__.__name__) + msg = 'fields have to be of type list,set,tuple or dict but got {}.' + msg = msg.format(fields.__class__.__name__) raise TypeError(msg) if name in registered_enums.keys(): enum = registered_enums[name] - if any( [(f not in fields) for f in enum.fields().keys()] ): - msg='Enum \'{}\' was already created with different entries:' - msg+='\n\tregistered enum: {}\n\tnew values: {}' - msg=msg.format(name, enum.fields().keys(), fields.keys()) + if any([(f not in fields) for f in enum.fields().keys()]): + msg = 'Enum \'{}\' was already created with different entries:' + msg += '\n\tregistered enum: {}\n\tnew values: {}' + msg = msg.format(name, enum.fields().keys(), fields.keys()) raise ValueError(msg) - elif any([ fields[k] != v for (k,v) in enum.fields().items()]): - msg='Enum \'{}\' was already created with different values:' - msg+='\n\tregistered enum: {}\n\tnew values: {}' - msg=msg.format(name, enum.fields(), fields) + elif any([fields[k] != v for (k, v) in enum.fields().items()]): + msg = 'Enum \'{}\' was already created with different values:' + msg += '\n\tregistered enum: {}\n\tnew values: {}' + msg = msg.format(name, enum.fields(), fields) raise ValueError(msg) elif enum.dtype != dtype: - msg="Enum '{}' was already created with different dtype!" - msg+='\n\tregistered enum dtype: {}\n\tnew dtype: {}' - msg=msg.format(name, enum.dtype, dtype) + msg = "Enum '{}' was already created with different dtype!" + msg += '\n\tregistered enum dtype: {}\n\tnew dtype: {}' + msg = msg.format(name, enum.dtype, dtype) raise ValueError(msg) else: return enum for k in fields.keys(): if not isinstance(k, str): - msg='Enum keys should be strings, got {} of type {}.' - msg=msg.format(k,k.__class__.__name__) + msg = 'Enum keys should be strings, got {} of type {}.' + msg = msg.format(k, k.__class__.__name__) raise TypeError(msg) if keyword.iskeyword(k): - msg='Enum entry ({}) cannot be a python keyword.' - msg=msg.format(k) + msg = 'Enum entry ({}) cannot be a python keyword.' + msg = msg.format(k) raise ValueError(msg) - if not re.match('[_A-Za-z][_a-zA-Z0-9]*$',k): - msg='Enum entry ({}) has to be a valid python identifier.' - msg=msg.format(k) + if not re.match('[_A-Za-z][_a-zA-Z0-9]*$', k): + msg = 'Enum entry ({}) has to be a valid python identifier.' + msg = msg.format(k) raise ValueError(msg) - fields = dict(zip(fields.keys(), np.asarray(tuple(fields.values())).astype(dtype))) + fields = dict(zip(fields.keys(), np.asarray(tuple(fields.values())).astype(dtype))) rfields = dict(zip(fields.values(), fields.keys())) def __fields(cls): return cls._fields + def __rfields(cls): return cls._rfields - def __getitem__(cls,val): - if isinstance(val,str) and (val in cls._fields.keys()): + def __getitem__(cls, val): + if isinstance(val, str) and (val in cls._fields.keys()): return cls._fields[val] - elif (isinstance(val,int) or isinstance(val,dtype)) and (val in cls._rfields.keys()): + elif (isinstance(val, int) or isinstance(val, dtype)) and (val in cls._rfields.keys()): return getattr(cls, cls._rfields[val]) else: raise RuntimeError('Unknown entry {} of type {}.'.format(val, type(val))) + def __str__(cls): return name + def __repr__(cls): return name - def __value(cls,field): + def __value(cls, val): if field in cls._fields.keys(): return cls.dtype(cls._fields[val]) else: raise RuntimeError('Unknown field {} of type {}.'.format(val, type(val))) - def __svalue(cls,val): + + def __svalue(cls, val): if val in cls._rfields.keys(): return cls._rfields[val] else: raise RuntimeError('Unknown value {} of type {}.'.format(val, type(val))) - def __variable(cls,name,typegen,val=None,**kwds): + def __variable(cls, name, typegen, val=None, **kwds): from hysop.backend.device.codegen.base.variables import CodegenVariable, \ - dtype_to_ctype + dtype_to_ctype if val is None: val = cls.fields().values()[0] - value = cls.value(val) + value = cls.value(val) svalue = cls.svalue(val) - return CodegenVariable(name=name,typegen=typegen,ctype=dtype_to_ctype(cls.dtype), - value=value,svalue=svalue,**kwds) - def __array_variable(cls,name,typegen,vals,**kwds): + return CodegenVariable(name=name, typegen=typegen, ctype=dtype_to_ctype(cls.dtype), + value=value, svalue=svalue, **kwds) + + def __array_variable(cls, name, typegen, vals, **kwds): from hysop.backend.device.codegen.base.variables import dtype_to_ctype, \ - CodegenVariable, CodegenArray + CodegenVariable, CodegenArray assert vals is not None size = len(vals) - value = [getattr(cls,cls.svalue(v)) if isinstance(v, int) - else v for v in vals] + value = [getattr(cls, cls.svalue(v)) if isinstance(v, int) + else v for v in vals] svalue = [cls.svalue(v) if isinstance(v, int) else str(v) for v in vals] - if len(vals)==1: - return CodegenVariable(name=name,typegen=typegen,ctype=dtype_to_ctype(cls.dtype), - value=value[0],svalue=svalue[0],**kwds) + if len(vals) == 1: + return CodegenVariable(name=name, typegen=typegen, ctype=dtype_to_ctype(cls.dtype), + value=value[0], svalue=svalue[0], **kwds) else: - return CodegenArray(name=name,typegen=typegen,ctype=dtype_to_ctype(cls.dtype), - value=value,svalue=svalue,dim=1,**kwds) + return CodegenArray(name=name, typegen=typegen, ctype=dtype_to_ctype(cls.dtype), + value=value, svalue=svalue, dim=1, **kwds) - mcls_dic = {'name':name, - 'dtype':dtype, + mcls_dic = {'name': name, + 'dtype': dtype, - '_fields':fields, - '_rfields':rfields, + '_fields': fields, + '_rfields': rfields, - 'fields':__fields, - 'rfields':__rfields, + 'fields': __fields, + 'rfields': __rfields, - 'value':__value, - 'svalue':__svalue, - 'variable':__variable, - 'array_variable':__array_variable, + 'value': __value, + 'svalue': __svalue, + 'variable': __variable, + 'array_variable': __array_variable, - '__getitem__':__getitem__, - '__str__':__str__, - '__repr__':__repr__} + '__getitem__': __getitem__, + '__str__': __str__, + '__repr__': __repr__} mcls = type(name+'MetaEnum', (EnumFactory.MetaEnum,), mcls_dic) class Enum(base_cls, metaclass=mcls): def __init__(self, field=tuple(sorted(fields.keys()))[0]): - assert isinstance(field, str) and len(field)>0 + assert isinstance(field, str) and len(field) > 0 self._field = field self._value = self.__class__.dtype(self.__class__._fields[field]) def svalue(self): return self._field + def value(self): return self._value @@ -221,18 +229,21 @@ class EnumFactory(object): def __int__(self): return int(self.value()) + def __float__(self): return float(self.value()) def __str__(self): return self.svalue() + def __repr__(self): - return '{}({})'.format(self.svalue(),self.value()) + return '{}({})'.format(self.svalue(), self.value()) def __eq__(self, other): if not isinstance(other, self.__class__): return NotImplemented return self._value == other._value + def __ne__(self, other): if not isinstance(other, self.__class__): return NotImplemented @@ -247,18 +258,18 @@ class EnumFactory(object): generated_enum = type(name+'Enum', (Enum,), {}) _all = [] - for k,v in fields.items(): + for k, v in fields.items(): instance = generated_enum(field=k) setattr(mcls, k, instance) _all.append(instance) - setattr(generated_enum,'all',_all) + setattr(generated_enum, 'all', _all) registered_enums[name] = generated_enum return generated_enum if __name__ == '__main__': - fields = {'X':0,'Y':1,'Z':42} + fields = {'X': 0, 'Y': 1, 'Z': 42} TestEnum = EnumFactory.create('Test', fields) X = TestEnum() @@ -284,4 +295,3 @@ if __name__ == '__main__': print(repr(X), repr(Y), repr(Z)) print() print(TestEnum.dtype, type(X.value())) - diff --git a/hysop/tools/io_utils.py b/hysop/tools/io_utils.py index 716643cb7..8d76c06af 100755 --- a/hysop/tools/io_utils.py +++ b/hysop/tools/io_utils.py @@ -22,6 +22,7 @@ from collections import namedtuple from inspect import getouterframes, currentframe from re import findall +from hysop.constants import DirectionLabels from hysop.tools.types import first_not_None, check_instance from hysop.tools.parameters import MPIParams from hysop.tools.warning import HysopWarning @@ -605,7 +606,7 @@ class Writer(object): a1 = a[i:i+N, :] # put a1 in 1D array form; ravel better than reshape for # non-contiguous arrays. - a1 = ravel(a1) + a1 = np.ravel(a1) fileobj.write(str_fmt_N % tuple(a1)) for i in range(shape0 - shape0 % N, shape0): fileobj.write(str_fmt % tuple(a[i])) diff --git a/hysop/tools/misc.py b/hysop/tools/misc.py index 3d4022516..818c9c44f 100644 --- a/hysop/tools/misc.py +++ b/hysop/tools/misc.py @@ -7,15 +7,20 @@ """ -import inspect, functools, operator +import inspect +import functools +import operator import numpy as np from hysop.constants import HYSOP_REAL, HYSOP_INTEGER +from hysop.tools.numpywrappers import npw + def getargspec(func): spec = inspect.getfullargspec(func) return (spec.args, spec.varargs, spec.varkw, spec.defaults) + def prod(values): """ Like sum but for products (of integers). @@ -25,12 +30,14 @@ def prod(values): except: return np.prod(values) + def compute_nbytes(shape, dtype): from hysop.tools.numerics import get_itemsize nbytes = prod(shape) * get_itemsize(dtype) - assert nbytes>0 + assert nbytes > 0 return nbytes + def get_default_args(func): """ returns a dictionary of arg_name:default_values for the input function. @@ -41,30 +48,34 @@ def get_default_args(func): else: return dict(zip(args[-len(defaults):], defaults)) + def get_argnames(func): """ returns arguments name and possible varargs. """ - argnames,varargs,_,_ = getargspec(func) + argnames, varargs, _, _ = getargspec(func) return argnames, varargs + def args2kargs(func, args): - argnames,_,_,_ = getargspec(func) + argnames, _, _, _ = getargspec(func) return dict(zip(argnames, args)) + def kargs2args(func, kargs, remove=[]): - argnames,_,_,_ = getargspec(func) + argnames, _, _, _ = getargspec(func) return tuple([kargs[a] for a in argnames if a not in remove]) + def upper_pow2(x): def _upper_pow2(x): - if x<0: + if x < 0: raise RuntimeError('x<0') - i=0 - k=2**i - while k<x: - i+=1 - k*=2 + i = 0 + k = 2**i + while k < x: + i += 1 + k *= 2 return k if np.isscalar(x): @@ -74,12 +85,13 @@ def upper_pow2(x): else: return type(x)(_upper_pow2(_x) for _x in x) + def next_pow2(x): def _next_pow2(x): if x <= 0: return 1 y = upper_pow2(x) - if x==y: + if x == y: y = upper_pow2(x+1) return y @@ -90,11 +102,12 @@ def next_pow2(x): else: return type(x)(_next_pow2(_x) for _x in x) + def previous_pow2(x): def _previous_pow2(x): - assert x>=1 + assert x >= 1 y = upper_pow2(x)//2 - if x==y: + if x == y: y = upper_pow2(x-1)//2 return y @@ -105,19 +118,20 @@ def previous_pow2(x): else: return type(x)(_previous_pow2(_x) for _x in x) + def upper_pow2_or_3(x): if np.isscalar(x): - y = x if x==3 else upper_pow2(x) + y = x if x == 3 else upper_pow2(x) else: y = upper_pow2(x) - y[x==3] = 3 + y[x == 3] = 3 return y + class Utils(object): """tools to handle array and slices. """ - """ Perform an indirect sort of seq using python default sorting algorithm. It returns an array of indices of the same length as input seq. @@ -129,13 +143,13 @@ class Utils(object): @staticmethod def upper_pow2(x): def _upper_pow2(x): - if x<0: + if x < 0: raise RuntimeError('x<0') - i=0 - k=2**i - while k<x: - i+=1 - k*=2 + i = 0 + k = 2**i + while k < x: + i += 1 + k *= 2 return k if np.isscalar(x): @@ -143,7 +157,6 @@ class Utils(object): else: return np.asarray([_upper_pow2(_x) for _x in x]) - @staticmethod def array_to_dict(inarray): """ diff --git a/hysop/topology/cartesian_topology.py b/hysop/topology/cartesian_topology.py index 1ea7720e2..523a29aeb 100644 --- a/hysop/topology/cartesian_topology.py +++ b/hysop/topology/cartesian_topology.py @@ -99,14 +99,6 @@ class CartesianTopologyState(TopologyState): """Return the TranspositionState corresponding to current permutation axes.""" return TranspositionState.axes_to_tstate(self._axes) - def copy(self, axes=None, memory_order=None): - """Return of copy of this object.""" - axes = first_not_None(axes, self._axes) - dim = len(axes) - memory_order = first_not_None(memory_order, self._memory_order) - return CartesianTopologyState(dim=dim, axes=axes, - memory_order=memory_order) - def __transposed(self, vec, axes): """ Compute permutation of input vector of size len(axes) according diff --git a/hysop/topology/topology.py b/hysop/topology/topology.py index 062483f85..2a293d6d9 100644 --- a/hysop/topology/topology.py +++ b/hysop/topology/topology.py @@ -257,7 +257,7 @@ class TopologyView(TaggedObjectView, metaclass=ABCMeta): def __str__(self): """Same as self.long_description().""" - return self.long_description() + return self.long_description(self.topology) topology = property(_get_topology) topology_state = property(_get_topology_state) -- GitLab