diff --git a/ci/docker_images/ubuntu/groovy/Dockerfile b/ci/docker_images/ubuntu/groovy/Dockerfile index a4a0064a50823aab8c06d09f05b4dc9746de0195..a9083f89809d79dedff7a4bc416f8e82483496a6 100644 --- a/ci/docker_images/ubuntu/groovy/Dockerfile +++ b/ci/docker_images/ubuntu/groovy/Dockerfile @@ -36,6 +36,20 @@ RUN cd /tmp && \ ENV MPICC "${MPI_ROOT}/bin/mpicc" RUN ldconfig && pip3.8 install --upgrade mpi4py +# HPTT (CPU tensor permutation library) +RUN cd /tmp && \ + git clone https://gitlab.com/keckj/hptt.git && \ + cd hptt && \ + mkdir build && \ + cd build && \ + cmake -DCMAKE_BUILD_TYPE=Release .. && \ + make && \ + make install && \ + cd ../pythonAPI && \ + pip3.8 install --upgrade . && \ + cd /tmp && \ + rm -Rf /tmp/hptt + # HDF5 + h5py (v1.10.6 is currently the last supported by h5py) RUN cd /tmp && \ wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.6/src/hdf5-1.10.6.tar.gz && \ @@ -120,8 +134,8 @@ RUN cd /tmp && \ cd pyopencl && \ git checkout v2020.2.2 && \ git submodule update --init && \ - python3.8 configure.py && \ - make && \ + CFLAGS='-O0 -g -UNDEBUG' python3.8 configure.py && \ + CFLAGS='-O0 -g -UNDEBUG' make && \ pip3.8 install --upgrade . && \ cd - && \ rm -Rf /tmp/pyopencl @@ -160,20 +174,6 @@ RUN cd /tmp && \ cd - && \ rm -Rf /tmp/gpyfft -# HPTT (CPU tensor permutation library) -RUN cd /tmp && \ - git clone https://gitlab.com/keckj/hptt.git && \ - cd hptt && \ - mkdir build && \ - cd build && \ - cmake -DCMAKE_BUILD_TYPE=Release .. && \ - make && \ - make install && \ - cd ../pythonAPI && \ - pip3.8 install --upgrade . && \ - cd /tmp && \ - rm -Rf /tmp/hptt - # python flint (FLINT2 + ARB + python-flint) RUN cd /tmp && \ wget https://github.com/wbhart/flint2/archive/v2.6.3.tar.gz && \ diff --git a/hysop/backend/device/autotunable_kernel.py b/hysop/backend/device/autotunable_kernel.py index 550da661ff080da8228553f635c07439c5dec2c7..9fdf35af4ab3e0eb4040adc346f7086b3d4621ca 100644 --- a/hysop/backend/device/autotunable_kernel.py +++ b/hysop/backend/device/autotunable_kernel.py @@ -296,7 +296,7 @@ class AutotunableKernel(object, metaclass=ABCMeta): check_instance(extra_parameters, dict, keys=str) check_instance(extra_kwds, dict, keys=str) - global_work_size = (work_bounds.work_size + work_load - 1) / work_load + global_work_size = (work_bounds.work_size + work_load - 1) // work_load (min_wg_size, max_wg_size) = self.compute_min_max_wg_size( work_bounds=work_bounds, work_load=work_load, global_work_size=global_work_size, @@ -625,7 +625,7 @@ class AutotunerWorkConfiguration(object): self._work_bounds = work_bounds self._work_load = work_load - self._global_work_size = (work_bounds.work_size + work_load - 1) / work_load + self._global_work_size = (work_bounds.work_size + work_load - 1) // work_load self._filters = {} self._filter_names = () diff --git a/hysop/backend/device/codegen/base/kernel_codegen.py b/hysop/backend/device/codegen/base/kernel_codegen.py index b6660730e5893e6fc0dd2c6abced0e1f70a47eaf..9518e40d3efcc59ed1611844c2a1366d04d90f8f 100644 --- a/hysop/backend/device/codegen/base/kernel_codegen.py +++ b/hysop/backend/device/codegen/base/kernel_codegen.py @@ -68,7 +68,7 @@ class KernelCodeGenerator(KernelBase, OpenClCodeGenerator): def get_global_work_size(self, work_size, local_work_size): work_size = np.asarray(work_size) local_work_size = np.asarray(local_work_size) - return ((work_size+local_work_size-1)/local_work_size) * local_work_size + return ((work_size+local_work_size-1)//local_work_size) * local_work_size def min_ghosts(self): ghosts = (0,)*self.work_dim diff --git a/hysop/backend/device/codegen/base/statistics.py b/hysop/backend/device/codegen/base/statistics.py index ad8d2d29a20a9fa0b8e2cdd62ef9164039f8d14f..bdb3deccde38a1507d4b7bd4d67b6b8198340545 100644 --- a/hysop/backend/device/codegen/base/statistics.py +++ b/hysop/backend/device/codegen/base/statistics.py @@ -32,12 +32,12 @@ def _fill_dtype_ops(): for itype,size in zip(int_base_types,ibytes): for vsize in vsizes: typename = itype + ('' if vsize==1 else str(vsize)) - dtype_ops[typename] = (vsize*float(size)/4, 'IOPS') + dtype_ops[typename] = (vsize*float(size)//4, 'IOPS') fbytes = [2,4,8] for ftype,size in zip(float_base_types,fbytes): for vsize in vsizes: typename = ftype + ('' if vsize==1 else str(vsize)) - dtype_ops[typename] = (vsize*float(size)/4, 'FLOPS') + dtype_ops[typename] = (vsize*float(size)//4, 'FLOPS') _fill_dtype_ops() @@ -150,11 +150,11 @@ class TimedWorkStatistics(WorkStatistics): return self._ops_per_category def global_mem_throughput(self): - return self.global_mem_transactions()/self.duration + return self.global_mem_transactions() / self.duration def local_mem_throughput(self): - return self.local_mem_transactions()/self.duration + return self.local_mem_transactions() / self.duration def total_mem_throughput(self): - return self.total_mem_transactions()/self.duration + return self.total_mem_transactions() / self.duration def _init(self): for dtype in self.ops_per_type: @@ -171,7 +171,7 @@ class TimedWorkStatistics(WorkStatistics): ops_per_second = {} for (op_category, op_count) in ops_count.items(): - ops_per_second[op_category] = op_count/self.duration + ops_per_second[op_category] = op_count / self.duration self._ops_per_category = ops_count self._ops_per_second = ops_per_second diff --git a/hysop/backend/device/codegen/base/test.py b/hysop/backend/device/codegen/base/test.py index 31f66b47751dcc51d34bc81ea8b269f1b89f382b..ced3eaae605fc0bf6bc3299c388a408c4d58c1db 100644 --- a/hysop/backend/device/codegen/base/test.py +++ b/hysop/backend/device/codegen/base/test.py @@ -20,7 +20,7 @@ def _test_mesh_info(name, typegen, dim, ghosts, resolution, **kargs): vsize = upper_pow2_or_3(dim) ghosts = [ghosts]*dim if np.isscalar(ghosts) else ghosts ghosts = np.asarray(ghosts)[:dim] - + resolution = [resolution]*dim if np.isscalar(resolution) else resolution resolution = np.asarray(resolution)[:dim] @@ -31,7 +31,7 @@ def _test_mesh_info(name, typegen, dim, ghosts, resolution, **kargs): compute_resolution = resolution-2*ghosts xmin = np.zeros((dim,)) - xmax = np.ones((dim,)) + xmax = np.ones((dim,)) size = xmax - xmin dx = size / (compute_resolution - 1) @@ -41,26 +41,26 @@ def _test_mesh_info(name, typegen, dim, ghosts, resolution, **kargs): mbs = MeshBaseStruct(typegen, vsize) mis = MeshInfoStruct(typegen, vsize, mbs_typedef=mbs.typedef) - # create a local numpy and a codegen MeshInfoStruct variable + # create a local numpy and a codegen MeshInfoStruct variable global_mesh = mbs.create(name='global', - resolution=resolution, compute_resolution=compute_resolution, - xmin=xmin, xmax=xmax, + resolution=resolution, compute_resolution=compute_resolution, + xmin=xmin, xmax=xmax, boundaries=boundaries, size=size, **kargs) - + xmin -= ghosts*dx xmax += ghosts*dx local_mesh = mbs.create(name='local', - resolution=resolution, compute_resolution=compute_resolution, - xmin=xmin, xmax=xmax, + resolution=resolution, compute_resolution=compute_resolution, + xmin=xmin, xmax=xmax, boundaries=boundaries, size=size, **kargs) (np_mis, cg_mis) = mis.create(name=name, - dim=dim, + dim=dim, start=start, stop=stop, ghosts=ghosts, dx=dx, @@ -69,7 +69,7 @@ def _test_mesh_info(name, typegen, dim, ghosts, resolution, **kargs): return (np_mis, cg_mis) -def make_slice_views(compute_grid_size, +def make_slice_views(compute_grid_size, lghosts=None, rghosts=None, step=None): compute_grid_size = np.asarray(compute_grid_size) dim = compute_grid_size.size @@ -79,13 +79,13 @@ def make_slice_views(compute_grid_size, elif np.isscalar(lghosts): lghosts = (lghosts,)*dim lghosts = np.asarray(lghosts) - + if (rghosts is None): rghosts = (0,)*dim elif np.isscalar(rghosts): rghosts = (rghosts,)*dim rghosts = np.asarray(rghosts) - + if (step is None): step = (1,)*dim elif np.isscalar(step): @@ -99,5 +99,5 @@ def make_slice_views(compute_grid_size, return view[::-1], grid_size, grid_shape, lghosts, rghosts, step - + diff --git a/hysop/backend/device/codegen/functions/directional_remesh.py b/hysop/backend/device/codegen/functions/directional_remesh.py index c7644226d25c3f7ac6f5bea1c43c596e4f640ea2..b234bee713ad859959762d62281515c8509f2c64 100644 --- a/hysop/backend/device/codegen/functions/directional_remesh.py +++ b/hysop/backend/device/codegen/functions/directional_remesh.py @@ -236,7 +236,7 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): dtype = tg.np_dtype(ftype) P = CodegenVariable(name='P', ctype=itype, typegen=tg, - const=True, value=1+(self.kernel.n/2)) + const=True, value=1+(self.kernel.n//2)) eps = CodegenVariable(name='eps', ctype=ftype, typegen=tg, const=True, value=np.finfo(dtype).eps) diff --git a/hysop/backend/device/codegen/functions/stretching_rhs.py b/hysop/backend/device/codegen/functions/stretching_rhs.py index ae339aaa301dfd5b0c5577b5ba9c0f3f25a47a83..17d6d6a9405e852bc0f94f32ccb39babf1b4ef3d 100644 --- a/hysop/backend/device/codegen/functions/stretching_rhs.py +++ b/hysop/backend/device/codegen/functions/stretching_rhs.py @@ -123,7 +123,7 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): def build_stencil(self,order): assert order%2==0 - h = order/2 + h = order//2 sg = StencilGenerator() sg.configure(dim=1,derivative=1,order=order) stencil = sg.generate_exact_stencil(origin=h) @@ -162,7 +162,7 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): inv_dx_var = CodegenVariable('inv_dx', ftype, typegen, add_impl_const=True, nl=True) stencil = self.build_stencil(order) - stencil.replace_symbols({stencil.dx:1/inv_dx_s}) + stencil.replace_symbols({stencil.dx:1.0/inv_dx_s}) symbol2vars = {inv_dx_s:inv_dx_var} apply_stencil = ApplyStencilFunction(typegen=typegen, @@ -236,7 +236,7 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): active = s.args['active'] dw_dt = CodegenVectorClBuiltin('dW_dt', ftype, dim, tg) - ghosts = CodegenVariable('ghosts', itype, tg, const=True, value=order/2, symbolic_mode=True) + ghosts = CodegenVariable('ghosts', itype, tg, const=True, value=order//2, symbolic_mode=True) with s._function_(): s.jumpline() diff --git a/hysop/backend/device/codegen/kernels/bandwidth.py b/hysop/backend/device/codegen/kernels/bandwidth.py index afbcd00ead28987401620ae21e3de4c7d5e90a61..89efd2cdb4288972344447eb5f937affd310b843 100644 --- a/hysop/backend/device/codegen/kernels/bandwidth.py +++ b/hysop/backend/device/codegen/kernels/bandwidth.py @@ -28,27 +28,27 @@ class BandwidthKernel(KernelCodeGenerator): vtype = vtype.replace(' ','_') name = 'bandwidth_{}_{}_{}'.format(vtype,nreads,nwrites) return name - + def __init__(self, typegen, vtype, nreads, nwrites, known_vars=None): assert nreads>0 and nwrites>0 - + kernel_args = self.gen_kernel_arguments(typegen, vtype) - + name = BandwidthKernel.codegen_name(vtype, nreads, nwrites) super(BandwidthKernel,self).__init__( name=name, typegen=typegen, - work_dim=1, + work_dim=1, kernel_args=kernel_args, known_vars=known_vars) - + self.vtype = vtype self.nreads = nreads self.nwrites = nwrites self.gencode() - + def gen_kernel_arguments(self, typegen, vtype): kargs = ArgDict() @@ -88,10 +88,10 @@ class BandwidthKernel(KernelCodeGenerator): if __name__ == '__main__': from hysop.backend.device.codegen.base.test import _test_typegen - + tg = _test_typegen('float') vtype = 'float4' - - ck = BandwidthKernel(tg, vtype, nreads=10, nwrites=1, + + ck = BandwidthKernel(tg, vtype, nreads=10, nwrites=1, known_vars={'global_size':1024, 'local_size': 512}) ck.edit() diff --git a/hysop/backend/device/codegen/kernels/copy_kernel.py b/hysop/backend/device/codegen/kernels/copy_kernel.py index 819e4576e752b736a45769177125c44a7a25fbdb..f37d8a14353913369c0cfc1202213832b675ddee 100644 --- a/hysop/backend/device/codegen/kernels/copy_kernel.py +++ b/hysop/backend/device/codegen/kernels/copy_kernel.py @@ -30,12 +30,12 @@ class CopyKernel(KernelCodeGenerator): if vectorized>1: name += '_v{}'.format(vectorized) return name - + def __init__(self, typegen, vtype, vectorized=1, src_mem='global', dst_mem='global', restrict=True, known_vars = None, force_symbolic = False): - + if vectorized==1: pass elif vectorized not in typegen.vsizes: @@ -44,18 +44,18 @@ class CopyKernel(KernelCodeGenerator): raise ValueError('Cannot vectorize vector types.') kernel_args = self.gen_kernel_arguments(typegen, vtype, src_mem, dst_mem, restrict) - + name = CopyKernel.codegen_name(src_mem, dst_mem, vtype, restrict, vectorized) super(CopyKernel,self).__init__( name=name, typegen=typegen, - work_dim=1, + work_dim=1, kernel_args=kernel_args, known_vars=known_vars, vec_type_hint=vtype, symbolic_mode=force_symbolic) - + self.vtype = vtype self.src_mem = src_mem self.dst_mem = dst_mem @@ -63,17 +63,17 @@ class CopyKernel(KernelCodeGenerator): self.vectorized = vectorized self.gencode() - + def gen_kernel_arguments(self, typegen, vtype, src_mem, dst_mem, restrict): - + ftype = typegen.basetype(vtype) components = typegen.components(vtype) is_base_type = (components==1) _src_mem = OpenClCodeGenerator.default_keywords[src_mem] _dst_mem = OpenClCodeGenerator.default_keywords[dst_mem] - + kargs = ArgDict() kargs['dst'] = CodegenVariable(ctype=vtype,name='dst',ptr=True,typegen=typegen, @@ -94,11 +94,11 @@ class CopyKernel(KernelCodeGenerator): dst = s.vars['dst'] src = s.vars['src'] count = s.vars['count'] - + with s._kernel_(): global_size.declare(s,const=True) - + if global_size.known(): s.pragma('unroll') with s._for_('int {gid}={fval}; {gid}<{N}; {gid}+={gsize}'.format(gid=global_id(), @@ -109,7 +109,7 @@ class CopyKernel(KernelCodeGenerator): vstore = 'vstore{N}({vload}, {offset}, {dst});'.format(vload=vload,N=N,offset=global_id(), dst=dst()) s.append(vstore) else: - s.append("{} = {};".format(dst[global_id()],src[global_id()])) + s.append("{} = {};".format(dst[global_id()],src[global_id()])) @staticmethod def autotune(cl_env, typegen, src, dst, vtype, count, restrict, @@ -119,12 +119,12 @@ class CopyKernel(KernelCodeGenerator): raise ValueError('cl_env is not an OpenClEnvironment.') if not isinstance(typegen, OpenClTypeGen): raise ValueError('typegen is not an OpenClTypeGen.') - + device = cl_env.device context = cl_env.ctx platform = cl_env.platform queue = cl_env.queue - + if vtype not in typegen.builtin_types: raise ValueError('{} is not an opencl bultin type.'.format(vtype)) if count < 0: @@ -145,7 +145,7 @@ class CopyKernel(KernelCodeGenerator): work_size=count min_local_size = max(1,clCharacterize.get_simd_group_size(device,1)) max_workitem_workload = 256 - + symbolic_mode = False #__KERNEL_DEBUG__ dump_src = __KERNEL_DEBUG__ @@ -153,24 +153,24 @@ class CopyKernel(KernelCodeGenerator): def kernel_generator(work_size, global_size, local_size, kernel_args, vectorized, force_verbose=False, force_debug=False, **kargs): - + ## Compile time known variables known_vars = dict( global_size=global_size[0], local_size=local_size[0], count = work_size[0] ) - + ## CodeGenerator codegen = CopyKernel(typegen=typegen, vtype=vtype, vectorized=vectorized, src_mem='global', dst_mem='global', restrict=restrict, force_symbolic=symbolic_mode, known_vars=known_vars) - + ## generate source code and build kernel src = codegen.__str__() src_hash = hashlib.sha512(src).hexdigest() - prg = cl_env.build_raw_src(src, build_opts, + prg = cl_env.build_raw_src(src, build_opts, kernel_name=codegen.name, force_verbose=force_verbose, force_debug=force_debug) kernel = prg.all_kernels()[0] @@ -182,11 +182,11 @@ class CopyKernel(KernelCodeGenerator): if work_size%vectorized!=0: raise ValueError('Invalid vector size.') - + codegen_name =CopyKernel.codegen_name('global','global',vtype,restrict,vectorized) autotuner = KernelAutotuner(name=codegen_name, work_dim=1, - build_opts=build_opts, + build_opts=build_opts, autotuner_config=autotuner_config) autotuner.add_filter('1d_shape_min', autotuner.min_workitems_per_direction) autotuner.enable_variable_workitem_workload(max_workitem_workload=max_workitem_workload) @@ -196,24 +196,24 @@ class CopyKernel(KernelCodeGenerator): 'dst':slice(1,2,1), 'src':slice(2,3,1) } - + (gwi, lwi, stats, wl) = autotuner.bench(typegen=typegen, - work_size=work_size/vectorized, kernel_args=kernel_args, + work_size=work_size//vectorized, kernel_args=kernel_args, kernel_generator=kernel_generator, dump_src=dump_src, min_local_size=min_local_size, vectorized=vectorized) (kernel, kernel_args, cached_bytes, src_hash) = kernel_generator( - work_size=[work_size/vectorized], global_size=gwi,local_size=lwi, + work_size=[work_size//vectorized], global_size=gwi,local_size=lwi, kernel_args=kernel_args, vectorized=vectorized, force_verbose=None,force_debug=None) - + kernel_launcher = OpenClKernelLauncher(kernel, queue, list(gwi), list(lwi)) return (stats, kernel_launcher, kernel_args, kernel_args_mapping, cached_bytes) - + candidates = [i for i in typegen.vsizes if work_size%i==0] - + best = None for vectorized in candidates: res = _autotune(vectorized); @@ -224,10 +224,10 @@ class CopyKernel(KernelCodeGenerator): if __name__ == '__main__': from hysop.backend.device.codegen.base.test import test_typegen - + tg = test_typegen('float','dec') vtype = 'float' - + ck = CopyKernel(tg, vtype, vectorized=16, force_symbolic=True, known_vars=dict( diff --git a/hysop/backend/device/codegen/kernels/directional_advection.py b/hysop/backend/device/codegen/kernels/directional_advection.py index eda15a1e35e7da89ec33f536dae9d269dfe366d7..f59792772cb9531346189a93f00bbdf9a615bfb5 100644 --- a/hysop/backend/device/codegen/kernels/directional_advection.py +++ b/hysop/backend/device/codegen/kernels/directional_advection.py @@ -623,7 +623,7 @@ class DirectionalAdvectionKernelGenerator(KernelCodeGenerator): s_mesh_size = position_mesh_info['global_mesh']['compute_resolution'].value[0] v_mesh_size = velocity_mesh_info['global_mesh']['compute_resolution'].value[0] if (s_mesh_size%v_mesh_size==0): - mesh_ratio = s_mesh_size / v_mesh_size + mesh_ratio = s_mesh_size // v_mesh_size with s._if_('k%{}==0'.format(mesh_ratio)): s.append('{} = convert_int_rtn(convert_{}({}+1)*{}*{});'.format(line_offset_for_v, ftype, line_offset, dx[0], v_inv_dx[0])) with s._for_('int {i}={fval}; {i}<{N} && ({i}+{o})<{Nv}; {i}+={gsize}'.format( diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py index 01d4172ac8e4f19e09e9686b2e2e5011ad8b9232..90f6108a56e9022ffada4211e24691a7d4d03bb2 100644 --- a/hysop/backend/device/codegen/kernels/directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/directional_remesh.py @@ -58,7 +58,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): assert remesh_kernel.n >= 1, 'Bad remeshing kernel.' if remesh_kernel.n > 1: assert remesh_kernel.n % 2 == 0, 'Odd remeshing kernel moments.' - min_ghosts = int(1+npw.floor(scalar_cfl)+remesh_kernel.n/2) + min_ghosts = int(1+npw.floor(scalar_cfl)+remesh_kernel.n//2) return min_ghosts @classmethod @@ -74,7 +74,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): work_load[0] = nparticles global_size = work_size.copy() - global_size = ((global_size+work_load-1)/work_load) + global_size = ((global_size+work_load-1)//work_load) return global_size def get_global_size(self, work_size, local_work_size, work_load=None): @@ -99,7 +99,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): 'local_work_size mismatch!' max_global_size = self.get_max_global_size(work_size, work_load, nparticles) - global_size = ((max_global_size+local_work_size-1)/local_work_size) * local_work_size + global_size = ((max_global_size+local_work_size-1)//local_work_size) * local_work_size global_size[0] = local_work_size[0] return global_size diff --git a/hysop/backend/device/codegen/kernels/directional_stretching.py b/hysop/backend/device/codegen/kernels/directional_stretching.py index 8355bf6259403fa601ec00c40be332825c8d585d..54e6c730b9c8fb8ca2b4ce73c46af580ab23eba0 100644 --- a/hysop/backend/device/codegen/kernels/directional_stretching.py +++ b/hysop/backend/device/codegen/kernels/directional_stretching.py @@ -131,7 +131,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): pass elif lboundary in [BoundaryCondition.NONE, BoundaryCondition.PERIODIC]: assert order%2==0 - stencil_ghost = order/2 + stencil_ghost = order//2 if formulation == StretchingFormulation.CONSERVATIVE: u_ghosts[direction] = time_integrator.stages * stencil_ghost else: @@ -149,7 +149,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): @staticmethod def min_wg_size(formulation, order, time_integrator): ghosts = [1]*3 - stencil_ghost = order/2 + stencil_ghost = order//2 if formulation == StretchingFormulation.CONSERVATIVE: ghosts[0] = time_integrator.stages * stencil_ghost else: @@ -166,7 +166,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): assert work_load[0] == 1 global_size = work_size.copy() - global_size = ((global_size+work_load-1)/work_load) + global_size = ((global_size+work_load-1)//work_load) return global_size #return global_work_size from effective work_size and given local_work_size @@ -193,7 +193,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): max_global_size = self.get_max_global_size(work_size, work_load) max_global_size[0] = local_work_size[0] - global_size = ((max_global_size+local_work_size-1)/local_work_size) * local_work_size + global_size = ((max_global_size+local_work_size-1)//local_work_size) * local_work_size return global_size @@ -233,7 +233,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): return (sc,dc,tc) def _cache_ghosts(self): - stencil_ghost = self.order/2 + stencil_ghost = self.order//2 if self.is_conservative: return self.time_integrator.stages * stencil_ghost else: diff --git a/hysop/backend/device/codegen/kernels/tests/test_codegen_directional_remesh.py b/hysop/backend/device/codegen/kernels/tests/test_codegen_directional_remesh.py index 729179140b34ebceb864974c791052a3dea4898e..5bdf6abeed07de3e5046c8cc73aa7e21096c0054 100644 --- a/hysop/backend/device/codegen/kernels/tests/test_codegen_directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/tests/test_codegen_directional_remesh.py @@ -222,7 +222,7 @@ Allocating fields: print('\n'+'\n\n'.join(str(x) for x in all_fields['with_ghosts'].values())) Lx = min(typegen.device.max_work_item_sizes[0], typegen.device.max_work_group_size) - Lx = min(Lx, self.compute_grid_size[0]/2) + Lx = min(Lx, self.compute_grid_size[0]//2) local_work_size = npw.asarray([Lx,1,1]) work_load = npw.asarray([1,1,3]) @@ -339,7 +339,7 @@ Allocating fields: print('_do_remesh_on_cpu(rk_scheme={}, boundary={}, criteria_eps={},\n\t\t kernel={})'.format( rk_scheme, boundary, remesh_criteria_eps, kernel)) - P = 1 + kernel.n/2 + P = 1 + kernel.n//2 assert (boundary == BoundaryCondition.NONE) assert (kernel.n == self.kernel_moments) assert (remesh_criteria_eps is None) or (remesh_criteria_eps>0) diff --git a/hysop/backend/device/codegen/kernels/tests/test_codegen_directional_stretching.py b/hysop/backend/device/codegen/kernels/tests/test_codegen_directional_stretching.py index f089ea462cc5bf0f931fa346429a7f9aae96685b..1780cb3dec7b69400b0ba84eb46216cf0b4afe6f 100644 --- a/hysop/backend/device/codegen/kernels/tests/test_codegen_directional_stretching.py +++ b/hysop/backend/device/codegen/kernels/tests/test_codegen_directional_stretching.py @@ -172,8 +172,8 @@ class TestDirectionalStretching(object): def deriv(field): res = np.zeros_like(field) - for i in range(-order/2,order/2+1,1): - res += stencil[i+order/2]*np.roll(field,-i,axis=2) + for i in range(-order//2,order//2+1,1): + res += stencil[i+order//2]*np.roll(field,-i,axis=2) return res*self.inv_dx[0] host_init_buffers = self.host_buffers_init[target] diff --git a/hysop/backend/device/codegen/kernels/tests/test_codegen_transpose.py b/hysop/backend/device/codegen/kernels/tests/test_codegen_transpose.py index 1a328dfa981f557dc5815ac7205819d7ced981e0..5d6444b4bf69c9cda7ded6957ecc4b51d81b3e3e 100644 --- a/hysop/backend/device/codegen/kernels/tests/test_codegen_transpose.py +++ b/hysop/backend/device/codegen/kernels/tests/test_codegen_transpose.py @@ -252,7 +252,7 @@ class TestTranspose(object): for i in range(work_dim-1,-1,-1): if local_work_size[i] > 1: break - local_work_size[i] /= 2 + local_work_size[i] //= 2 work_load = [1]*work_dim work_size = dak.shape_to_worksize(self.grid_size, tile_size, @@ -277,8 +277,8 @@ class TestTranspose(object): print(' *tile={} vec={} worksize={} L={} WL={} G={} num_groups={} num_blocks={}'\ .format(tile_size, vectorization, work_size, local_work_size, work_load, global_work_size, - (global_work_size+local_work_size-1)/local_work_size, - (tile_grid_size+tile_size-1)/tile_size)) + (global_work_size+local_work_size-1)//local_work_size, + (tile_grid_size+tile_size-1)//tile_size)) if is_inplace: variables = ['Tin'] diff --git a/hysop/backend/device/codegen/kernels/transpose.py b/hysop/backend/device/codegen/kernels/transpose.py index 036205cb6dd09ab31eff80ccb6d7f29b5b79cd0e..cdc6d0abcf98e90371ba29eca4709a925e1c8222 100644 --- a/hysop/backend/device/codegen/kernels/transpose.py +++ b/hysop/backend/device/codegen/kernels/transpose.py @@ -79,7 +79,7 @@ class TransposeKernelGenerator(KernelCodeGenerator): j=0 for i,Si in enumerate(shape): if i==0: - max_local_worksize[j] = (tile_size+vectorization-1) / vectorization + max_local_worksize[j] = (tile_size+vectorization-1) // vectorization elif i in tile_indexes: max_local_worksize[j] = tile_size elif i < (wdim - int(contiguous_permutation and tile_indexes[1]>wdim-1)): @@ -114,13 +114,13 @@ class TransposeKernelGenerator(KernelCodeGenerator): for i,Si in enumerate(shape): if i==0: wl = work_load[j] - ngroups[j] = (Si+vts*wl-1)/(vts*wl) + ngroups[j] = (Si+vts*wl-1)//(vts*wl) elif i in tile_indexes: wl = work_load[j] - ngroups[j] = ((Si+ts*wl-1)/(ts*wl)) + ngroups[j] = ((Si+ts*wl-1)//(ts*wl)) elif i < (wdim - int(contiguous_permutation and tile_indexes[1]>wdim-1)): wl = work_load[j] - ngroups[j] = (Si+wl-1)/wl + ngroups[j] = (Si+wl-1)//wl else: continue j+=1 diff --git a/hysop/backend/device/codegen/structs/mesh_info.py b/hysop/backend/device/codegen/structs/mesh_info.py index 87cb9e82117a2743cc5e71673aab5d251d098703..2b30b6b7b1af4e5fd67bed365c6db22e404ffbb4 100644 --- a/hysop/backend/device/codegen/structs/mesh_info.py +++ b/hysop/backend/device/codegen/structs/mesh_info.py @@ -20,20 +20,20 @@ class MeshBaseStruct(StructCodeGenerator): typedef = name+'_s' else: typedef = None - super(MeshBaseStruct,self).__init__(name=name, - dtype=dtype, + super(MeshBaseStruct,self).__init__(name=name, + dtype=dtype, typegen=typegen, - typedef=typedef, + typedef=typedef, comments=comments) self.reqs['boundary_enum'] = BoundaryConditionEnum self.vsize = vsize - + @staticmethod def build_dtype(typegen, vsize): tg = typegen intn = tg.dtype_from_str('int{}'.format(vsize)) floatn = tg.dtype_from_str('fbtype{}'.format(vsize)) - + dtype = [] for fn in ['resolution','compute_resolution']: field = (fn, intn) @@ -44,7 +44,7 @@ class MeshBaseStruct(StructCodeGenerator): dtype.append(('lboundary', intn)) dtype.append(('rboundary', intn)) - + comments = [ "Resolution of the mesh -including- ghosts", "Resolution of the mesh -excluding- ghosts", @@ -56,27 +56,27 @@ class MeshBaseStruct(StructCodeGenerator): ] return np.dtype(dtype), comments - - - def create(self, name, + + + def create(self, name, resolution, compute_resolution, boundaries, xmin, xmax, size, **kargs): vsize = self.vsize dtype = self.dtype - + def extend(var, d=0): return np.asarray(tuple(var)+(d,)*(vsize-len(var))) tg = self.typegen - + lboundary, rboundary = boundaries lboundary = extend(lboundary, BoundaryCondition.NONE) rboundary = extend(rboundary, BoundaryCondition.NONE) _lboundary = [ bd() for bd in lboundary ] _rboundary = [ bd() for bd in rboundary ] - + mesh_base_vals = { 'resolution': tg.make_intn(resolution,vsize), 'compute_resolution': tg.make_intn(compute_resolution,vsize), @@ -86,7 +86,7 @@ class MeshBaseStruct(StructCodeGenerator): 'xmax': tg.make_floatn(xmax,vsize), 'size': tg.make_floatn(size,vsize) } - + var = np.empty(shape=(1,), dtype=dtype) for k in mesh_base_vals: var[k] = mesh_base_vals[k] @@ -111,7 +111,7 @@ class MeshBaseStruct(StructCodeGenerator): class MeshInfoStruct(StructCodeGenerator): def __init__(self, typegen, vsize, - typedef=True, + typedef=True, mbs_typedef=None): assert vsize in [1,2,3,4,8,16] name = '{}MeshInfo{}D'.format(typegen.fbtype[0], vsize) @@ -124,10 +124,10 @@ class MeshInfoStruct(StructCodeGenerator): typedef = None dtype,comments,ctype_overrides, reqs = MeshInfoStruct.build_dtype(typegen, vsize, mbs_typedef=mbs_typedef) super(MeshInfoStruct,self).__init__(name=name, dtype=dtype, typegen=typegen, - typedef=typedef, + typedef=typedef, comments=comments, ctype_overrides=ctype_overrides) - + for req in reqs: self.require(req.name, req) self.mesh_base = reqs[0] @@ -142,8 +142,8 @@ class MeshInfoStruct(StructCodeGenerator): i=0 dtypes = [] - def _append(dtype): - dtypes.append(dtype) + def _append(dtype): + dtypes.append(dtype) _append(('dim',np.int32) ) i+=1 @@ -161,21 +161,21 @@ class MeshInfoStruct(StructCodeGenerator): comments = [ "Dimension of the mesh", - "Index of the first local compute point in the global grid", - "Index of the last local compute point in the global grid", + "Index of the first local compute point in the global grid", + "Index of the last local compute point in the global grid", "Number of ghosts in each direction", "Space discretization", "1/dx", "Local mesh", "Global mesh", ] - + return dtypes, comments, None, [mesh_base] def create(self, name, dim, start, stop, ghosts, dx, local_mesh, global_mesh, **kargs): - + vsize = self.vsize if dim>vsize: msg='Dim should be less or equal to {}, got dim={}.'.format(vsize, dim) @@ -185,8 +185,8 @@ class MeshInfoStruct(StructCodeGenerator): vsize = self.vsize dtype = self.dtype - - dx = np.asarray(dx) + + dx = np.asarray(dx) mesh_info_vals = { 'dim' : tg.make_intn(dim,1), 'start' : tg.make_intn(start, vsize), @@ -197,7 +197,7 @@ class MeshInfoStruct(StructCodeGenerator): 'local_mesh' : local_mesh[0], 'global_mesh': global_mesh[0] } - + def extend(var,d=0): if isinstance(var, np.ndarray): _var = np.full(shape=(vsize,), fill_value=d, dtype=var.dtype) @@ -228,19 +228,19 @@ class MeshInfoStruct(StructCodeGenerator): for k in mesh_info_vals: var[k] = mesh_info_vals[k] return (var, cg_var) - + @staticmethod def create_from_mesh(name, typegen, mesh, **kargs): from hysop.mesh.cartesian_mesh import CartesianMeshView check_instance(name, str) - check_instance(mesh, CartesianMeshView) + check_instance(mesh, CartesianMeshView) check_instance(typegen, OpenClTypeGen) - + tg = typegen - + dim = mesh.dim vsize = upper_pow2_or_3(dim) - + btd = '{}MeshBase{}D_s'.format(tg.fbtype[0], vsize) itd = '{}MeshInfo{}D_s'.format(tg.fbtype[0], vsize) mesh_base = MeshBaseStruct(tg, vsize, typedef=btd) @@ -257,7 +257,7 @@ class MeshInfoStruct(StructCodeGenerator): gxmax = mesh.global_end[::-1] gsize = mesh.global_length[::-1] gboundaries = mesh.global_boundaries[::-1] - + lresolution = mesh.local_resolution[::-1] lcompute_resolution = mesh.compute_resolution[::-1] lxmin = mesh.local_origin[::-1] @@ -278,7 +278,7 @@ class MeshInfoStruct(StructCodeGenerator): dx, lmesh, gmesh, **kargs) return (var, cg_var) - + def build_codegen_variable(self,name,var_overrides=None,**kargs): tg = self.typegen if var_overrides is None: @@ -297,17 +297,17 @@ if __name__ == '__main__': from hysop.backend.device.codegen.base.test import _test_typegen tg = _test_typegen('double', float_dump_mode='dec') - + vsize = 4 mbs = MeshBaseStruct(tg, typedef='{}MeshBase{}D_s'.format(tg.fbtype[0], vsize), vsize=vsize) mis = MeshInfoStruct(tg, typedef='{}MeshInfo{}D_s'.format(tg.fbtype[0], vsize), mbs_typedef=mbs.typedef, vsize=vsize) cg = OpenClCodeGenerator('test_generator',tg) - + # declare mesh MeshInfoStruct and its dependancies (MeshBaseStruct,MeshDirectionEnum,TranspositionStateEnum) cg.require('mis',mis) - - # create a local numpy and a codegen MeshInfoStruct variable + + # create a local numpy and a codegen MeshInfoStruct variable local_mesh = mbs.create('local', (10,10,10,), (8,8,8,), ((BoundaryCondition.PERIODIC,)*3,)*2, (0,0,0,), (1,1,1,), @@ -317,11 +317,11 @@ if __name__ == '__main__': (100,100,100,), (80,80,80,), ((BoundaryCondition.PERIODIC,)*3,)*2, (0,0,0,), (10,10,10,), (10,10,10,)) - - (np_mis, cg_mis) = mis.create('mesh_info', 3, + + (np_mis, cg_mis) = mis.create('mesh_info', 3, (0,0,0,), (1024,1024,1024,), (1,1,1,), (0.1,0.2,0.3), - local_mesh, global_mesh, + local_mesh, global_mesh, storage='__constant') # declare and intialize the nested struct in the __constant address space diff --git a/hysop/backend/device/kernel_statistics.py b/hysop/backend/device/kernel_statistics.py index 47072398d28e32835438fc18ace297c1274121c9..4052fcde25e5938c379bfe4ddd853ea7b4261f2d 100644 --- a/hysop/backend/device/kernel_statistics.py +++ b/hysop/backend/device/kernel_statistics.py @@ -31,6 +31,7 @@ class KernelStatistics(object): def _get_mean(self): assert (self._nruns>0) return self._total / float(self._nruns) + def _get_total(self): return self._total def _get_nruns(self): diff --git a/hysop/backend/device/opencl/autotunable_kernels/custom_symbolic.py b/hysop/backend/device/opencl/autotunable_kernels/custom_symbolic.py index 5e3ac78d638dfe42fa86254293fabcb0829a5b36..56208423448c5c711260929c8ce065388826249d 100644 --- a/hysop/backend/device/opencl/autotunable_kernels/custom_symbolic.py +++ b/hysop/backend/device/opencl/autotunable_kernels/custom_symbolic.py @@ -398,7 +398,7 @@ class OpenClAutotunableCustomSymbolicKernel(OpenClAutotunableKernel): def compute_global_work_size(self, work, local_work_size, extra_parameters, extra_kwds): # contiguous axe is iterated fully by one work_group - global_work_size = ((work.global_work_size+local_work_size-1)/local_work_size)*local_work_size + global_work_size = ((work.global_work_size+local_work_size-1)//local_work_size)*local_work_size global_work_size[0] = local_work_size[0] return global_work_size diff --git a/hysop/backend/device/opencl/autotunable_kernels/transpose.py b/hysop/backend/device/opencl/autotunable_kernels/transpose.py index 1dbd3fdb78a802c946817ba22653ef3b891f0d63..b39d0742f8b0e1c19a2d1fe9846e567b959a61a5 100644 --- a/hysop/backend/device/opencl/autotunable_kernels/transpose.py +++ b/hysop/backend/device/opencl/autotunable_kernels/transpose.py @@ -20,9 +20,9 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): nbytes = dtype.itemsize factor = 2.0 if is_inplace else 1.0 max_cache_elems = int(self.usable_cache_bytes_per_wg / (factor*nbytes)) - + if len(tile_indexes)==2: - x = int(npw.sqrt(max_cache_elems)) + x = int(npw.sqrt(max_cache_elems)) #while x*(x+1) > max_cache_elems: #x-=1 # tile offsetting will just trigger the usual cache exception @@ -30,15 +30,15 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): else: # no cache is used max_ts_cache = npw.inf - + tile_shape = shape[tile_indexes] max_ts_shape = max(tile_shape) - + max_tile_size = min(max_ts_cache, max_ts_shape) return max_tile_size - def autotune(self, is_inplace, - input_buffer, output_buffer, + def autotune(self, is_inplace, + input_buffer, output_buffer, axes, hardcode_arrays, name=None, **kwds): """Autotune this kernel with specified axes, inputs and outputs.""" @@ -51,19 +51,19 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): assert input_buffer.ndim == output_buffer.ndim assert input_buffer.size == output_buffer.size assert input_buffer.dtype == output_buffer.dtype - + dim = input_buffer.ndim size = input_buffer.size shape = npw.asintarray(input_buffer.shape[::-1]) dtype = input_buffer.dtype ctype = clTools.dtype_to_ctype(dtype) - + # check if the permutation is valid assert dim>=2 assert len(axes)==dim assert set(axes)==set(range(dim)) assert axes != tuple(range(dim)) - + # check if is_inplace is allowed if is_inplace: can_compute_inplace = (dim == 2) @@ -72,12 +72,12 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): if not can_compute_inplace: raise ValueError(msg) assert (input_buffer.data == output_buffer.data) - + # get vector size for strides make_offset, offset_dtype = self.make_array_offset() - make_strides, strides_dtype = self.make_array_strides(dim, + make_strides, strides_dtype = self.make_array_strides(dim, hardcode_arrays=hardcode_arrays) - + kernel_args = {} known_args = {} isolation_params = {} @@ -87,62 +87,62 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): kernel_args['inout_base'] = output_buffer.base_data target_args['inout_strides'] = make_strides(output_buffer.strides, output_buffer.dtype) target_args['inout_offset'] = make_offset(output_buffer.offset, output_buffer.dtype) - isolation_params['inout_base'] = dict(count=output_buffer.size, + isolation_params['inout_base'] = dict(count=output_buffer.size, dtype=output_buffer.dtype, range=slice(output_buffer.size)) else: kernel_args['in_base'] = input_buffer.base_data target_args['in_strides'] = make_strides(input_buffer.strides, input_buffer.dtype) target_args['in_offset'] = make_offset(input_buffer.offset, input_buffer.dtype) - isolation_params['in_base'] = dict(count=input_buffer.size, dtype=input_buffer.dtype, + isolation_params['in_base'] = dict(count=input_buffer.size, dtype=input_buffer.dtype, range=slice(input_buffer.size)) kernel_args['out_base'] = output_buffer.base_data target_args['out_strides'] = make_strides(output_buffer.strides, output_buffer.dtype) target_args['out_offset'] = make_offset(output_buffer.offset, output_buffer.dtype) - isolation_params['out_base'] = dict(count=output_buffer.size, + isolation_params['out_base'] = dict(count=output_buffer.size, dtype=output_buffer.dtype, fill=0) - + if (name is None): name = 'transpose_{}_[{}]_{}'.format(ctype, ','.join(str(a) for a in axes), 'inplace' if is_inplace else 'out_of_place') - - # if last axe (contiguous axe) is permuted, + + # if last axe (contiguous axe) is permuted, # we need to use 2D tiles else we only need 1D tiles. (last_axe_permuted, work_dim, work_shape, tile_indices) = \ - TransposeKernelGenerator.characterize_permutation(shape, axes, + TransposeKernelGenerator.characterize_permutation(shape, axes, self.max_device_work_dim()) - + # keyword arguments will be agregated into extra_kwds dictionnary - return super(OpenClAutotunableTransposeKernel, self).autotune(name=name, - kernel_args=kernel_args, + return super(OpenClAutotunableTransposeKernel, self).autotune(name=name, + kernel_args=kernel_args, known_args=known_args, hardcode_arrays=hardcode_arrays, offset_dtype=offset_dtype, strides_dtype=strides_dtype, - axes=axes, - dtype=dtype, + axes=axes, + dtype=dtype, ctype=ctype, shape=shape, - tile_indices=tile_indices, + tile_indices=tile_indices, work_dim=work_dim, work_size=work_shape, is_inplace=is_inplace, isolation_params=isolation_params, last_axe_permuted=last_axe_permuted, **kwds) - def compute_parameters(self, extra_kwds): + def compute_parameters(self, extra_kwds): """Register extra parameters to optimize.""" check_instance(extra_kwds, dict, keys=str) params = super(OpenClAutotunableTransposeKernel, self).compute_parameters( extra_kwds=extra_kwds) - ## Register extra parameters + ## Register extra parameters # compute max tile fize from device cache tile_indices = extra_kwds['tile_indices'] dtype = extra_kwds['dtype'] shape = extra_kwds['shape'] is_inplace = extra_kwds['is_inplace'] last_axe_permuted = extra_kwds['last_axe_permuted'] - + flag = self.autotuner_config.autotuner_flag vectorization = (1,) use_diagonal_coordinates = (False,) @@ -158,8 +158,8 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): tile_sizes = (max_tile_size,) + tuple(sorted(tile_sizes, reverse=True)) tile_sizes = tuple(filter(lambda x: (x>=max_tile_size//8) and (x<=max_tile_size), tile_sizes)) - - params.register_extra_parameter('vectorization', vectorization) + + params.register_extra_parameter('vectorization', vectorization) params.register_extra_parameter('use_diagonal_coordinates', use_diagonal_coordinates) params.register_extra_parameter('tile_padding', tile_padding) params.register_extra_parameter('tile_size', tile_sizes) @@ -168,21 +168,21 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): def compute_work_candidates(self, work_bounds, work_load, extra_parameters, extra_kwds): """ - Configure work (global_size, local_size candidates) given a + Configure work (global_size, local_size candidates) given a OpenClWorkBoundsConfiguration object and a work_load. Return a WorkConfiguration object. - + Notes ----- global_work_size can be ignored if it depends on local_work_size and will be set in self.compute_global_work_size(). """ work = super(OpenClAutotunableTransposeKernel, self).compute_work_candidates( - work_bounds=work_bounds, work_load=work_load, - extra_parameters=extra_parameters, + work_bounds=work_bounds, work_load=work_load, + extra_parameters=extra_parameters, extra_kwds=extra_kwds) - + axes = extra_kwds['axes'] work_dim = extra_kwds['work_dim'] shape = extra_kwds['shape'] @@ -192,7 +192,7 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): max_tile_work_size = TransposeKernelGenerator.max_local_worksize(axes=axes, shape=shape, work_dim=work_dim, tile_size=tile_size, vectorization=vectorization) - work.push_filter('max_tile_worksize', work.max_wi_sizes_filter, + work.push_filter('max_tile_worksize', work.max_wi_sizes_filter, max_work_item_sizes=max_tile_work_size) return work @@ -209,13 +209,13 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): return gs def generate_kernel_src(self, global_work_size, local_work_size, - extra_parameters, extra_kwds, tuning_mode, dry_run, + extra_parameters, extra_kwds, tuning_mode, dry_run, force_verbose=False, force_debug=False, return_codegen = False): """ Generate kernel name and source code. """ - + ## Extract usefull variables axes = extra_kwds['axes'] ctype = extra_kwds['ctype'] @@ -224,26 +224,26 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): ## Get compile time OpenCL known variables known_vars = super(OpenClAutotunableTransposeKernel, self).generate_kernel_src( - global_work_size=global_work_size, - local_work_size=local_work_size, - extra_parameters=extra_parameters, + global_work_size=global_work_size, + local_work_size=local_work_size, + extra_parameters=extra_parameters, extra_kwds=extra_kwds, tuning_mode=tuning_mode, dry_run=dry_run) known_vars.update(known_args) known_vars['shape'] = self.to_vecn(extra_kwds['shape'], 0) - + ## Generate OpenCL source code - codegen = TransposeKernelGenerator(axes=axes, + codegen = TransposeKernelGenerator(axes=axes, typegen=self.typegen, ctype=ctype, is_inplace=is_inplace, symbolic_mode=self.symbolic_mode, known_vars=known_vars, debug_mode=force_debug, tuning_mode=tuning_mode, **extra_parameters) - + kernel_name = codegen.name kernel_src = str(codegen) ## Check if cache would fit self.check_cache(codegen.required_workgroup_cache_size()[2]) - + return (kernel_name, kernel_src) def compute_args_mapping(self, extra_kwds, extra_parameters): @@ -252,25 +252,25 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): strides_dtype = extra_kwds['strides_dtype'] hardcode_arrays = extra_kwds['hardcode_arrays'] if extra_kwds['is_inplace']: - args_mapping['inout_base'] = (0, cl.MemoryObjectHolder) + args_mapping['inout_base'] = (0, cl.MemoryObjectHolder) if not hardcode_arrays: - args_mapping['inout_strides'] = (1, strides_dtype) + args_mapping['inout_strides'] = (1, strides_dtype) args_mapping['inout_offset'] = (2, offset_dtype) else: - args_mapping['in_base'] = (0, cl.MemoryObjectHolder) + args_mapping['in_base'] = (0, cl.MemoryObjectHolder) if not hardcode_arrays: - args_mapping['in_strides'] = (1, strides_dtype) + args_mapping['in_strides'] = (1, strides_dtype) args_mapping['in_offset'] = (2, offset_dtype) - args_mapping['out_base'] = (1 + 2*(not hardcode_arrays), cl.MemoryObjectHolder) + args_mapping['out_base'] = (1 + 2*(not hardcode_arrays), cl.MemoryObjectHolder) if not hardcode_arrays: - args_mapping['out_strides'] = (4, strides_dtype) + args_mapping['out_strides'] = (4, strides_dtype) args_mapping['out_offset'] = (5, offset_dtype) return args_mapping def hash_extra_kwds(self, extra_kwds): """Hash extra_kwds dictionnary for caching purposes.""" - return self.custom_hash(extra_kwds['ctype'], - extra_kwds['axes'], + return self.custom_hash(extra_kwds['ctype'], + extra_kwds['axes'], extra_kwds['shape'], extra_kwds['is_inplace'], extra_kwds['known_args']) diff --git a/hysop/backend/device/opencl/opencl_autotunable_kernel.py b/hysop/backend/device/opencl/opencl_autotunable_kernel.py index 8c0f8046ba79a230925e186423f0a6d12379bd10..d25c49df9964f5e3aa33ba1d85268ab03e24a37e 100644 --- a/hysop/backend/device/opencl/opencl_autotunable_kernel.py +++ b/hysop/backend/device/opencl/opencl_autotunable_kernel.py @@ -62,7 +62,7 @@ class OpenClAutotunableKernel(AutotunableKernel): check_instance(extra_parameters, dict, keys=str) check_instance(extra_kwds, dict, keys=str) global_work_size = work.global_work_size - return ((global_work_size+local_work_size-1)/local_work_size)*local_work_size + return ((global_work_size+local_work_size-1)//local_work_size)*local_work_size @abstractmethod def generate_kernel_src(self, global_work_size, local_work_size, diff --git a/hysop/backend/device/opencl/opencl_device.py b/hysop/backend/device/opencl/opencl_device.py index aea54a9e851edfdf7ebbaa02633be681a62f8fb6..c38686647682909d889b049a870cc3bcf9e63f24 100644 --- a/hysop/backend/device/opencl/opencl_device.py +++ b/hysop/backend/device/opencl/opencl_device.py @@ -288,7 +288,7 @@ bytes2str(self.local_mem_size(), decimal=False), flops2str(info.max_dp_compute))] failed = False if (info.has_single_precision_compute and info.has_double_precision_compute): - ratio = info.max_dp_compute/info.max_sp_compute + ratio = info.max_dp_compute / float(info.max_sp_compute) ratio = fractions.Fraction(ratio).limit_denominator(64) ss += ['{{s}}double to float compute ratio: {} (DP/SP)'.format(ratio)] failed = False diff --git a/hysop/backend/device/opencl/opencl_tools.py b/hysop/backend/device/opencl/opencl_tools.py index 5114a1a3cb606af3330ae7227077690d37249a74..e0dc4de79bd041b3cfe8b0ab817915912efb8dc2 100644 --- a/hysop/backend/device/opencl/opencl_tools.py +++ b/hysop/backend/device/opencl/opencl_tools.py @@ -237,7 +237,7 @@ def get_work_items(resolution, vector_width=1): print("Warning : GPU best performances obtained for",) print("problem sizes multiples of 256") while resolution[0] % workItemNumber > 0: - workItemNumber = workItemNumber / 2 + workItemNumber = workItemNumber // 2 # Change work-item regarding vector_width if workItemNumber * vector_width > resolution[0]: if resolution[0] % vector_width > 0: diff --git a/hysop/backend/device/opencl/operator/spatial_filtering.py b/hysop/backend/device/opencl/operator/spatial_filtering.py index d08fa406a8dd965a5bf272764cb3575e9e83ae23..ffffc9afcaf7962d62e9c3e552a7f9be84acc691 100644 --- a/hysop/backend/device/opencl/operator/spatial_filtering.py +++ b/hysop/backend/device/opencl/operator/spatial_filtering.py @@ -85,7 +85,7 @@ class OpenClPolynomialRestrictionFilter(PolynomialRestrictionFilterBase, OpenClO assert self.subgrid_restrictor.gr == gr ekg = self.elementwise_kernel_generator - Rr = self.subgrid_restrictor.Rr / self.subgrid_restrictor.GR + Rr = self.subgrid_restrictor.Rr / self.subgrid_restrictor.gr ghosts = np.asarray(self.subgrid_restrictor.ghosts) I = np.asarray(local_indices_symbols[:dim][::-1]) diff --git a/hysop/backend/hardware/hwinfo.py b/hysop/backend/hardware/hwinfo.py index 5510a00d87c11b46c202a95d3f2175516cffe451..19080abab7298a486d46c31c9a903fffaad0ec96 100644 --- a/hysop/backend/hardware/hwinfo.py +++ b/hysop/backend/hardware/hwinfo.py @@ -85,7 +85,7 @@ class TopologyObject(object, metaclass=ABCMeta): mask_length = self._processing_units_count() _cpuset = '|{0:0{length}b}|'.format(cpuset, length=mask_length) _cpuset = _cpuset.replace('0','.').replace('1', 'x') - _cpuset += ' 0x{0:0{length}x}'.format(cpuset, length=mask_length/4) + _cpuset += ' 0x{0:0{length}x}'.format(cpuset, length=mask_length//4) return _cpuset def all_cpu_set(self): diff --git a/hysop/backend/hardware/machine.py b/hysop/backend/hardware/machine.py index f41ca8bd8009e75bfa736b2200c566df8964b8f0..095ddb20b2f01703c3614ab59ecb4693334a7f99 100644 --- a/hysop/backend/hardware/machine.py +++ b/hysop/backend/hardware/machine.py @@ -39,7 +39,7 @@ class NumaNode(TopologyObject): mask_length = int(math.ceil(math.log(self.parent.full_node_set(),2))) _nodeset = '|{0:0{length}b}|'.format(nodeset, length=mask_length) _nodeset = _nodeset.replace('0','.').replace('1', 'x') - _nodeset += ' 0x{0:0{length}x}'.format(nodeset, length=mask_length/4) + _nodeset += ' 0x{0:0{length}x}'.format(nodeset, length=mask_length//4) return _nodeset diff --git a/hysop/backend/hardware/pci.py b/hysop/backend/hardware/pci.py index bcc06f07d45528bdc23d1eda1837c6d01c940a58..483c7cc8738df4589ae62dd6eb1d484b27c5cf11 100644 --- a/hysop/backend/hardware/pci.py +++ b/hysop/backend/hardware/pci.py @@ -86,7 +86,7 @@ class OperatingSystemDevice(TopologyObject): 'multi_processors': multi_processors, 'cores_per_mp': cores_per_mp, 'cores': cores_per_mp*multi_processors, - 'global_memory_size': ', '.join(tuple(bytes2str(int(mem)*1024*1024/1000) for mem in global_mem)), + 'global_memory_size': ', '.join(tuple(bytes2str(int(mem)*1024*1024//1000) for mem in global_mem)), 'shared_memory_size_per_mp': ', '.join(tuple(bytes2str(int(mem)*1000) for mem in shared_mem_per_mp)), 'l2_cache_size': ', '.join(tuple(bytes2str(int(mem)*1000) for mem in l2_cache_size)), } diff --git a/hysop/backend/host/python/operator/directional/advection_dir.py b/hysop/backend/host/python/operator/directional/advection_dir.py index ff610fa984981972e19b1ecfb1ce26d737ee4d25..fb80b950f137b413c1838508f021a2c305139ce1 100644 --- a/hysop/backend/host/python/operator/directional/advection_dir.py +++ b/hysop/backend/host/python/operator/directional/advection_dir.py @@ -335,7 +335,7 @@ class PythonDirectionalAdvection(DirectionalAdvectionBase, HostDirectionalOperat scalar_advection_ghosts = self.scalar_advection_ghosts remesh_ghosts = self.remesh_ghosts remesh_kernel = self.remesh_kernel - P = 1 + remesh_kernel.n/2 + P = 1 + remesh_kernel.n//2 is_periodic = self.is_periodic diff --git a/hysop/backend/host/python/operator/flowrate_correction.py b/hysop/backend/host/python/operator/flowrate_correction.py index 9ed4274584801523e25207a038513728489a39ee..b8382624a0aaa7301afccbd3af091f3c9378d014 100644 --- a/hysop/backend/host/python/operator/flowrate_correction.py +++ b/hysop/backend/host/python/operator/flowrate_correction.py @@ -71,8 +71,8 @@ class PythonFlowRateCorrection(HostOperator): # Compute volume and surface integration coefficients spaceStep = vmesh.space_step lengths = vtopo.domain.length - self._inv_ds = 1. / np.prod(lengths[:-1]) - self._inv_dvol = 1. / (lengths[0]*lengths[1] * + self._inv_ds = 1.0 / np.prod(lengths[:-1]) + self._inv_dvol = 1.0 / (lengths[0]*lengths[1] * (self.absorption_start-vtopo.domain.origin[-1])) self.coeff_mean = np.prod(spaceStep) / np.prod(lengths) diff --git a/hysop/backend/host/python/operator/solenoidal_projection.py b/hysop/backend/host/python/operator/solenoidal_projection.py index a73a02da477b7f7f5ca004e553a09a49167e6ea6..cd7ced9795b19c58ec81d6415f5f249aa4d46340 100644 --- a/hysop/backend/host/python/operator/solenoidal_projection.py +++ b/hysop/backend/host/python/operator/solenoidal_projection.py @@ -1,6 +1,6 @@ import functools -import numba as nb +import numba as nb from hysop import __DEFAULT_NUMBA_TARGET__ from hysop.tools.types import check_instance, first_not_None @@ -16,7 +16,7 @@ class PythonSolenoidalProjection(SolenoidalProjectionOperatorBase, OpenClMappabl Solves solenoidal projection (project a 3d field F such that div(F)=0), using spectral methods. """ - + @classmethod def build_projection_filter(cls, FIN, FOUT, K, KK, target=__DEFAULT_NUMBA_TARGET__): assert len(FIN)==len(FOUT)==3 @@ -31,13 +31,13 @@ class PythonSolenoidalProjection(SolenoidalProjectionOperatorBase, OpenClMappabl @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) - def filter_projection_3d(Fin0, Fin1, Fin2, - K00, K01, K02, - K10, K11, K12, - K20, K21, K22, - KK00, KK01, KK02, - KK10, KK11, KK12, - KK20, KK21, KK22, + def filter_projection_3d(Fin0, Fin1, Fin2, + K00, K01, K02, + K10, K11, K12, + K20, K21, K22, + KK00, KK01, KK02, + KK10, KK11, KK12, + KK20, KK21, KK22, Fout0, Fout1, Fout2): for i in prange(0, Fin0.shape[0]): for j in prange(0, Fin0.shape[1]): @@ -95,7 +95,7 @@ class PythonSolenoidalProjection(SolenoidalProjectionOperatorBase, OpenClMappabl self.post_filter_div = make_div_filter(*args) else: self.post_filter_div = None - + @debug def setup(self, work): super(PythonSolenoidalProjection, self).setup(work=work) @@ -115,7 +115,7 @@ class PythonSolenoidalProjection(SolenoidalProjectionOperatorBase, OpenClMappabl if self.compute_divFin: self.pre_filter_div() self.backward_divFin_transform() - + self.filter_projection() if self.compute_divFout: diff --git a/hysop/backend/host/python/operator/vorticity_absorption.py b/hysop/backend/host/python/operator/vorticity_absorption.py index ae33f507a3319dc947272cbe45776fd88cb5f15e..d5f317291c78669a1f3515eec6f0d222672dc04a 100644 --- a/hysop/backend/host/python/operator/vorticity_absorption.py +++ b/hysop/backend/host/python/operator/vorticity_absorption.py @@ -102,7 +102,7 @@ class PythonVorticityAbsorption(HostOperator): domain = self.dvelocity.domain lengths = domain.length - self._inv_ds = 1. / np.prod(lengths[:-1]) + self._inv_ds = 1.0 / np.prod(lengths[:-1]) # Compute slice and x_coords for the absorption region, # for given start_coord to domain end. diff --git a/hysop/core/arrays/tests/test_array.py b/hysop/core/arrays/tests/test_array.py index a67ebd745b32ece5bcb3a5addbca4b21138e0423..c978c5b36b25d3fa21f8c885a6861ecb52449534 100644 --- a/hysop/core/arrays/tests/test_array.py +++ b/hysop/core/arrays/tests/test_array.py @@ -116,7 +116,7 @@ class TestArray(object): assert A[0][1][0] == 8 assert A[1][0][0] == 8*4 assert A.strides == (8*4, 8, 1) - assert A[1][1][1] == np.sum(np.asarray(A.strides) / A.dtype.itemsize) + assert A[1][1][1] == np.sum(np.asarray(A.strides) // A.dtype.itemsize) assert A[i1][j1][k1] == np.sum(np.asarray(A.strides) * (i1, j1, k1)) assert A[i0][j0][k0] == np.sum(np.asarray(A.strides) * (i0, j0, k0)) assert (A[i1][j1][k1]-A[i0][j0][k0]) == np.dot(A.strides, (i1-i0, j1-j0, k1-k0)) @@ -128,7 +128,7 @@ class TestArray(object): assert B[0][1][0] == 2 assert B[1][0][0] == 1 assert B.strides == (1, 2, 2*4) - assert B[1][1][1] == np.sum(np.asarray(B.strides) / B.dtype.itemsize) + assert B[1][1][1] == np.sum(np.asarray(B.strides) // B.dtype.itemsize) assert B[i1][j1][k1] == np.sum(np.asarray(B.strides) * (i1, j1, k1)) assert B[i0][j0][k0] == np.sum(np.asarray(B.strides) * (i0, j0, k0)) assert (B[i1][j1][k1]-B[i0][j0][k0]) == np.dot(B.strides, (i1-i0, j1-j0, k1-k0)) @@ -317,7 +317,6 @@ class TestArray(object): assert backend.allclose(a-b, -(b-a)) assert backend.allclose(a+b-c-d, -c-d+b+a) assert backend.allclose(a*b*c*d, d*c*a*b) - #assert backend.allclose( (a/b)*(c/d), (a*c)/(b*d) ) a = a % b a = a//b diff --git a/hysop/core/checkpoints.py b/hysop/core/checkpoints.py index 592ee6dec62e77e589890bdaba48a86c639f7bef..efa9697bd032a04df02675a8a5cf8bfff16610f7 100644 --- a/hysop/core/checkpoints.py +++ b/hysop/core/checkpoints.py @@ -11,7 +11,7 @@ from hysop.parameters import ScalarParameter, TensorParameter, BufferParameter from hysop.fields.cartesian_discrete_field import CartesianDiscreteScalarField class CheckpointHandler(object): - def __init__(self, load_checkpoint_path, save_checkpoint_path, + def __init__(self, load_checkpoint_path, save_checkpoint_path, compression_method, compression_level, io_params, relax_constraints): check_instance(load_checkpoint_path, str, allow_none=True) @@ -41,7 +41,7 @@ class CheckpointHandler(object): self._checkpoint_template = None self._checkpoint_compressor = None - + @property def load_checkpoint_path(self): return self._load_checkpoint_path @@ -60,17 +60,17 @@ class CheckpointHandler(object): @property def relax_constraints(self): return self._relax_constraints - + def get_mpio_parameters(self, mpi_params): io_params = self.io_params comm = mpi_params.comm - io_leader = io_params.io_leader + io_leader = io_params.io_leader is_io_leader = (io_leader == mpi_params.rank) return (io_params, mpi_params, comm, io_leader, is_io_leader) def is_io_leader(self, mpi_params): return (self.io_params.io_leader == mpi_params.rank) - + def finalize(self, mpi_params): if ((self._checkpoint_template is not None) and os.path.exists(self._checkpoint_template) @@ -81,17 +81,17 @@ class CheckpointHandler(object): pass self._checkpoint_template = None self._checkpoint_compressor = None - + def load_checkpoint(self, problem, simulation): - from hysop.problem import Problem + from hysop.problem import Problem from hysop.simulation import Simulation check_instance(problem, Problem) check_instance(simulation, Simulation) - + load_checkpoint_path = self.load_checkpoint_path if (load_checkpoint_path is None): return - + vprint('\n>Loading {}problem checkpoint from \'{}\'...'.format( 'relaxed' if self.relax_constraints else '', load_checkpoint_path)) if not os.path.exists(load_checkpoint_path): @@ -100,19 +100,19 @@ class CheckpointHandler(object): if (self.io_params is None): msg='Load checkpoint has been set to \'{}\' but checkpoint_io_params has not been specified.' raise RuntimeError(msg.format(load_checkpoint_path)) - + (io_params, mpi_params, comm, io_leader, is_io_leader) = self.get_mpio_parameters(problem.mpi_params) start = Wtime() - + # extract checkpoint to directory if required if os.path.isfile(load_checkpoint_path): if load_checkpoint_path.endswith('.tar'): if is_io_leader: - load_checkpoint_dir = os.path.join(os.path.dirname(load_checkpoint_path), + load_checkpoint_dir = os.path.join(os.path.dirname(load_checkpoint_path), os.path.basename(load_checkpoint_path).replace('.tar', '')) while os.path.exists(load_checkpoint_dir): # ok, use another directory name to avoid dataloss... - load_checkpoint_dir = os.path.join(os.path.dirname(load_checkpoint_path), + load_checkpoint_dir = os.path.join(os.path.dirname(load_checkpoint_path), '{}'.format(uuid.uuid4().hex)) tf = tarfile.open(load_checkpoint_path, mode='r') tf.extractall(path=load_checkpoint_dir) @@ -125,16 +125,16 @@ class CheckpointHandler(object): raise NotImplementedError(msg.format(load_checkpoint_path)) elif os.path.isdir(load_checkpoint_path): load_checkpoint_dir = load_checkpoint_path - should_remove_dir = False + should_remove_dir = False else: raise RuntimeError - + # import checkpoint data self._import_checkpoint(problem, simulation, load_checkpoint_dir) if (is_io_leader and should_remove_dir): shutil.rmtree(load_checkpoint_dir) - + ellapsed = Wtime() - start msg=' > Successfully imported checkpoint in {}.' vprint(msg.format(time2str(ellapsed))) @@ -146,11 +146,11 @@ class CheckpointHandler(object): if (io_params is None): return False return io_params.should_dump(simulation) - + # Checkpoint is first exported as a directory containing a hierarchy of arrays (field and parameters data + metadata) # This folder is than tarred (without any form of compression) so that a checkpoint consists in a single movable file. - # Data is already compressed during data export by the zarr module, using the blosc compressor (snappy, clevel=9). + # Data is already compressed during data export by the zarr module, using the blosc compressor (snappy, clevel=9). def save_checkpoint(self, problem, simulation): save_checkpoint_path = self.save_checkpoint_path if (self.save_checkpoint_path is None): @@ -159,16 +159,16 @@ class CheckpointHandler(object): if (self.io_params is None): msg='Load checkpoint has been set to \'{}\' but checkpoint io_params has not been specified.' raise RuntimeError(msg.format(load_checkpoint_path)) - + vprint('>Exporting problem checkpoint to \'{}\':'.format(save_checkpoint_path)) if not save_checkpoint_path.endswith('.tar'): msg='Can only export checkpoint with tar extension, got {}.' raise NotImplementedError(msg.format(save_checkpoint_path)) save_checkpoint_tar = save_checkpoint_path - + (io_params, mpi_params, comm, io_leader, is_io_leader) = self.get_mpio_parameters(problem.mpi_params) start = Wtime() - + # create a backup of last checkpoint just in case things go wrong if is_io_leader and os.path.exists(save_checkpoint_tar): backup_checkpoint_tar = save_checkpoint_tar + '.bak' @@ -180,16 +180,16 @@ class CheckpointHandler(object): # determine checkpoint dump directory if is_io_leader: - save_checkpoint_dir = os.path.join(os.path.dirname(save_checkpoint_tar), + save_checkpoint_dir = os.path.join(os.path.dirname(save_checkpoint_tar), os.path.basename(save_checkpoint_tar).replace('.tar', '')) while os.path.exists(save_checkpoint_dir): # ok, use another directory name to avoid dataloss... - save_checkpoint_dir = os.path.join(os.path.dirname(save_checkpoint_tar), + save_checkpoint_dir = os.path.join(os.path.dirname(save_checkpoint_tar), '{}'.format(uuid.uuid4().hex)) else: save_checkpoint_dir = None save_checkpoint_dir = mpi_params.comm.bcast(save_checkpoint_dir, root=io_leader) - + # try to create the checkpoint directory, this is a collective MPI operation try: success, reason, nbytes = self._export_checkpoint(problem, simulation, save_checkpoint_dir) @@ -198,7 +198,7 @@ class CheckpointHandler(object): success = False reason = str(e) success = comm.allreduce(int(success)) == comm.size - + # Compress checkpoint directory to tar (easier to copy/move between clusters) # Note that there is no effective compression here, zarr already compressed field/param data if success and is_io_leader and os.path.isdir(save_checkpoint_dir): @@ -213,7 +213,7 @@ class CheckpointHandler(object): shutil.rmtree(save_checkpoint_dir) else: raise RuntimeError('Could not tar checkpoint datadir.') - + ellapsed = Wtime() - start effective_nbytes = os.path.getsize(save_checkpoint_tar) compression_ratio = max(1.0, float(nbytes)/effective_nbytes) @@ -224,15 +224,15 @@ class CheckpointHandler(object): success = False reason = str(e) success = comm.allreduce(int(success)) == comm.size - + if success: if (backup_checkpoint_tar is not None) and os.path.isfile(backup_checkpoint_tar) and is_io_leader: os.remove(backup_checkpoint_tar) return - + from hysop.tools.warning import HysopDumpWarning msg='Failed to export checkpoint because: {}.'.format(reason) - warnings.warn(msg, HysopDumpWarning) + warnings.warn(msg, HysopDumpWarning) # Something went wrong (I/O error or other) so we rollback to previous checkpoint (if there is one) vprint(' | An error occured during checkpoint creation, rolling back to previous checkpoint...') @@ -243,30 +243,30 @@ class CheckpointHandler(object): os.remove(save_checkpoint_tar) if (backup_checkpoint_tar is not None) and os.path.exists(backup_checkpoint_tar): os.rename(backup_checkpoint_tar, save_checkpoint_tar) - + def create_checkpoint_template(self, problem, simulation): # Create groups of arrays on disk (only hierarchy and array metadata is stored in the template) - # /!\ ZipStores are not safe from multiple processes so we use a DirectoryStore + # /!\ ZipStores are not safe from multiple processes so we use a DirectoryStore # that can then be tarred manually by io_leader. - + save_checkpoint_path = self.save_checkpoint_path if (save_checkpoint_path is None): return - + if not save_checkpoint_path.endswith('.tar'): msg='Can only export checkpoint with tar extension, got {}.' raise NotImplementedError(msg.format(save_checkpoint_path)) - + (io_params, mpi_params, comm, io_leader, is_io_leader) = self.get_mpio_parameters(problem.mpi_params) - + # determine an empty directory for the template if is_io_leader: - checkpoint_template = os.path.join(os.path.dirname(save_checkpoint_path), + checkpoint_template = os.path.join(os.path.dirname(save_checkpoint_path), os.path.basename(save_checkpoint_path).replace('.tar', '.template')) while os.path.exists(checkpoint_template): # ok, use another directory name to avoid dataloss... - checkpoint_template = os.path.join(os.path.dirname(save_checkpoint_path), + checkpoint_template = os.path.join(os.path.dirname(save_checkpoint_path), '{}'.format(uuid.uuid4().hex)) else: checkpoint_template = None @@ -277,13 +277,13 @@ class CheckpointHandler(object): import zarr from numcodecs import blosc, Blosc blosc.use_threads = (mpi_params.size == 1) # disable threads for multiple processes (can deadlock) - + # array data compressor self._compression_method = first_not_None(self._compression_method, 'zstd') self._compression_level = first_not_None(self._compression_level, 6) compressor = Blosc(cname=self._compression_method, clevel=self._compression_level, shuffle=Blosc.SHUFFLE) self._checkpoint_compressor = compressor - + # io_leader creates a directory layout on (hopefully) shared filesystem if is_io_leader: if os.path.exists(checkpoint_template): @@ -304,7 +304,7 @@ class CheckpointHandler(object): # count number of total data bytes without compression nbytes = 0 fmt_key = self._format_zarr_key - + # operators for op in problem.nodes: if not op.checkpoint_required(): @@ -329,7 +329,7 @@ class CheckpointHandler(object): assert isinstance(param._value, np.ndarray), type(param._value) value = param._value array = params_group.create_dataset(name=fmt_key(param.name), - overwrite=False, data=None, synchronizer=None, + overwrite=False, data=None, synchronizer=None, compressor=compressor, shape=value.shape, chunks=None, dtype=value.dtype, fill_value=default_invalid_value(value.dtype)) array.attrs['kind'] = param.__class__.__name__ @@ -337,11 +337,11 @@ class CheckpointHandler(object): else: msg = 'Cannot export parameter of type {}.'.format(param.__class__.__name__) raise NotImplementedError(msg) - + # Generate discrete field arrays # Here we assume that each process has a non-empty chunk of data for field in sorted(problem.fields, key=operator.attrgetter('name')): - + # we do not care about fields discretized only on temporary fields if all(df.is_tmp for df in field.discrete_fields.values()): continue @@ -353,7 +353,7 @@ class CheckpointHandler(object): dim = field.dim domain = field.domain._domain - + if isinstance(domain, Box): if (field_group is not None): field_group.attrs['domain'] = 'Box' @@ -362,17 +362,17 @@ class CheckpointHandler(object): field_group.attrs['end'] = to_tuple(domain.end) field_group.attrs['length'] = to_tuple(domain.length) else: - # for now we just handle Boxed domains + # for now we just handle Boxed domains raise NotImplementedError for (k, topo) in enumerate(sorted(field.discrete_fields, key=operator.attrgetter('full_tag'))): dfield = field.discrete_fields[topo] mesh = topo.mesh._mesh - + # we do not care about temporary fields if dfield.is_tmp: continue - + if not isinstance(dfield, CartesianDiscreteScalarField): # for now we just handle CartesianDiscreteScalarFields. raise NotImplementedError @@ -380,9 +380,9 @@ class CheckpointHandler(object): global_resolution = topo.global_resolution # logical grid size grid_resolution = topo.grid_resolution # effective grid size ghosts = topo.ghosts - + # get local resolutions exluding ghosts - compute_resolutions = comm.gather(to_tuple(mesh.compute_resolution), root=io_leader) + compute_resolutions = comm.gather(to_tuple(mesh.compute_resolution), root=io_leader) # is the current process handling a right boundary data block on a distributed axe ? is_at_right_boundary = (mesh.is_at_right_boundary*(mesh.proc_shape>1)).any() @@ -390,14 +390,14 @@ class CheckpointHandler(object): if not is_io_leader: continue - - # io_leader can now determine wether the cartesian discretization is uniformly distributed + + # io_leader can now determine wether the cartesian discretization is uniformly distributed # between processes or not - inner_compute_resolutions = tuple(compute_resolutions[i] for i in range(len(compute_resolutions)) + inner_compute_resolutions = tuple(compute_resolutions[i] for i in range(len(compute_resolutions)) if not is_at_right_boundary[i]) - grid_is_uniformly_distributed = all(res == inner_compute_resolutions[0] + grid_is_uniformly_distributed = all(res == inner_compute_resolutions[0] for res in inner_compute_resolutions) - + if grid_is_uniformly_distributed: # We divide the array in 'compute_resolution' chunks, no sychronization is required. # Here there is no need to use the process locker to write this array data. @@ -412,29 +412,29 @@ class CheckpointHandler(object): if dim == 1: chunks = 1024*1024 # at least 1MB / chunk elif dim == 2: - chunks = (1024,1024) # at least 1MB / chunk + chunks = (1024,1024) # at least 1MB / chunk elif dim == 3: chunks = (64,128,128) # at least 1MB / chunk else: raise NotImplementedError(dim) - + if should_sync: raise NotImplementedError - + # Create array (no memory is allocated here, even on disk because data blocks are empty) dtype = dfield.dtype shape = grid_resolution - + # We scale the keys up to 100 topologies, which seams to be a pretty decent upper limit # on a per field basis. - array = field_group.create_dataset(name='topo_{:02d}'.format(k), - overwrite=False, data=None, synchronizer=None, - compressor=compressor, shape=shape, chunks=chunks, + array = field_group.create_dataset(name='topo_{:02d}'.format(k), + overwrite=False, data=None, synchronizer=None, + compressor=compressor, shape=shape, chunks=chunks, dtype=dtype, fill_value=default_invalid_value(dtype)) array.attrs['should_sync'] = should_sync # We cannot rely on discrete mesh name because of topology names - # so we save some field metadata to be able to differentiate between + # so we save some field metadata to be able to differentiate between # discrete fields with the exact same grid resolution. # proc_shape and name are used in last resort to differentiate discrete fields. array.attrs['lboundaries'] = to_tuple(map(str, mesh.global_lboundaries)) @@ -442,9 +442,9 @@ class CheckpointHandler(object): array.attrs['ghosts'] = to_tuple(mesh.ghosts) array.attrs['proc_shape'] = to_tuple(mesh.proc_shape) array.attrs['name'] = dfield.name - + nbytes += np.prod(shape, dtype=np.int64) * dtype.itemsize - + if (root is not None): root.attrs['nbytes'] = nbytes msg='>Maximum checkpoint size will be {}, without compression and metadata.' @@ -457,7 +457,7 @@ class CheckpointHandler(object): root.close() except AttributeError: pass - + def _export_checkpoint(self, problem, simulation, save_checkpoint_dir): # Given a template, fill field and parameters data from all processes. @@ -475,13 +475,13 @@ class CheckpointHandler(object): shutil.rmtree(save_checkpoint_dir) shutil.copytree(checkpoint_template, save_checkpoint_dir) comm.Barrier() - + if not os.path.isdir(save_checkpoint_dir): msg='Could not find checkpoint directory \'{}\'. Are you using a network file system ?'.format(save_checkpoint_dir) raise RuntimeError(msg) #Every process now loads the same dataset template - import zarr + import zarr try: store = zarr.DirectoryStore(save_checkpoint_dir) root = zarr.open_group(store=store, mode='r+', synchronizer=None, path='data') @@ -494,14 +494,14 @@ class CheckpointHandler(object): msg='A fatal error occured during checkpoint export, checkpoint template may be illformed.' vprint(msg) vprint() - raise + raise fmt_key = self._format_zarr_key # Export simulation data if is_io_leader: simulation.save_checkpoint(simu_group, mpi_params, io_params, checkpoint_compressor) - + # Export operator data for op in problem.nodes: if not op.checkpoint_required(): @@ -524,14 +524,14 @@ class CheckpointHandler(object): else: msg = 'Cannot dump parameter of type {}.'.format(param.__class__.__name__) raise NotImplementedError(msg) - + # Unlike parameter all processes participate for fields for field in sorted(problem.fields, key=operator.attrgetter('name')): # we do not care about fields discretized only on temporary fields if all(df.is_tmp for df in field.discrete_fields.values()): continue - + msg = ' | dumping field {}...'.format(field.pretty_name) vprint(msg) @@ -539,7 +539,7 @@ class CheckpointHandler(object): for (k, topo) in enumerate(sorted(field.discrete_fields, key=operator.attrgetter('full_tag'))): dfield = field.discrete_fields[topo] mesh = topo.mesh._mesh - + # we do not care about temporary fields if dfield.is_tmp: continue @@ -552,18 +552,18 @@ class CheckpointHandler(object): assert (array.shape == mesh.grid_resolution).all(), (array.shape, mesh.grid_resolution) assert array.dtype == dfield.dtype, (array.dtype, dfield.dtype) - if should_sync: + if should_sync: # Should not be required untill we allow non-uniform discretizations global_start = mesh.global_start global_stop = mesh.global_stop raise NotImplementedError('Synchronized multiprocess write has not been implemented yet.') else: - assert ((mesh.compute_resolution == array.chunks).all() + assert ((mesh.compute_resolution == array.chunks).all() or (mesh.is_at_right_boundary*(mesh.proc_shape>1)).any()) local_data = dfield.compute_data[0].get() global_slices = mesh.global_compute_slices array[global_slices] = local_data # ok, every process writes to an independent data blocks - + # Some zarr store formats require a final close to flush data try: root.close() @@ -571,20 +571,20 @@ class CheckpointHandler(object): pass return True, None, nbytes - + # On data import, there is no need to synchronize read-only arrays # so we are good with multiple processes reading overlapping data blocks def _import_checkpoint(self, problem, simulation, load_checkpoint_dir): - + (io_params, mpi_params, comm, io_leader, is_io_leader) = self.get_mpio_parameters(problem.mpi_params) mpi_params.comm.Barrier() if not os.path.isdir(load_checkpoint_dir): msg='Could not find checkpoint directory \'{}\'. Are you using a network file system ?'.format(load_checkpoint_dir) raise RuntimeError(msg) - - import zarr + + import zarr store = zarr.DirectoryStore(load_checkpoint_dir) try: root = zarr.open_group(store=store, mode='r', synchronizer=None, path='data') @@ -595,8 +595,8 @@ class CheckpointHandler(object): msg='A fatal error occured during checkpoint import, checkpoint data may be illformed.' vprint(msg) vprint() - raise - + raise + # Define helper functions relax_constraints = self.relax_constraints raise_error = self._raise_error @@ -606,12 +606,12 @@ class CheckpointHandler(object): raise_warning = self._raise_error load_array_data = functools.partial(self._load_array_data, on_mismatch=raise_warning) fmt_key = self._format_zarr_key - + # Import simulation data after parameters are up to date msg = ' | importing simulation...' vprint(msg) simulation.load_checkpoint(simu_group, mpi_params, io_params, relax_constraints) - + # Import operator data for op in problem.nodes: if not op.checkpoint_required(): @@ -623,7 +623,7 @@ class CheckpointHandler(object): continue operator_group = operators_group[key] op.load_checkpoint(operator_group, mpi_params, io_params, relax_constraints) - + # Import parameters, hopefully parameter names match the ones in the checkpoint msg = ' | importing parameters...' vprint(msg) @@ -634,7 +634,7 @@ class CheckpointHandler(object): msg='Checkpoint directory \'{}\' does not contain any data regarding to parameter {}' msg=msg.format(load_checkpoint_dir, param.name) raise_error(msg) - + array = params_group[key] if array.attrs['kind'] != param.__class__.__name__: @@ -644,7 +644,7 @@ class CheckpointHandler(object): if isinstance(param, (ScalarParameter, TensorParameter, BufferParameter)): value = param._value - + if (array.shape != value.shape): msg='Parameter shape does not match with checkpointed parameter {}, loaded shape {} but expected {}.' msg=msg.format(param.name, array.shape, value.shape) @@ -654,15 +654,15 @@ class CheckpointHandler(object): msg='Parameter datatype does not match with checkpointed parameter {}, loaded dtype {} but expected {}.' msg=msg.format(param.name, array.dtype, value.dtype) raise_warning(msg) - + value[...] = array[...] else: msg = 'Cannot import parameter of type {}.'.format(param.__class__.__name__) raise NotImplementedError(msg) - + # Import discrete fields, this is a bit more tricky because topologies or simply topology - # names can change. Moreover there is currently no waranty that the same operator graph is - # generated for the exact same problem configuration each time. We just emit user warnings + # names can change. Moreover there is currently no waranty that the same operator graph is + # generated for the exact same problem configuration each time. We just emit user warnings # if we find a way to match topologies that do not match exactly checkpointed ones. for field in sorted(problem.fields, key=operator.attrgetter('name')): domain = field.domain._domain @@ -673,7 +673,7 @@ class CheckpointHandler(object): msg = ' | importing field {}...'.format(field.pretty_name) vprint(msg) - + field_key = fmt_key(field.name) if (field_key not in fields_group): msg='Checkpoint directory \'{}\' does not contain any data regarding to field {}' @@ -685,7 +685,7 @@ class CheckpointHandler(object): # check that domain matches if field_group.attrs['domain'] != domain.__class__.__name__: msg='Domain kind does not match with checkpointed field {}, loaded kind {} but expected {}.' - msg=msg.format(field.name, field_group.attrs['domain'], domain.__class__.__name__) + msg=msg.format(field.name, field_group.attrs['domain'], domain.__class__.__name__) raise_error(msg) if field_group.attrs['dim'] != domain.dim: msg='Domain dim does not match with checkpointed field {}, loaded dim {} but expected {}.' @@ -711,7 +711,7 @@ class CheckpointHandler(object): # we do not care about temporary fields if dfield.is_tmp: continue - + # for now we just handle CartesianDiscreteScalarFields. if not isinstance(dfield, CartesianDiscreteScalarField): raise NotImplementedError @@ -720,7 +720,7 @@ class CheckpointHandler(object): candidates = tuple(filter(lambda d: np.equal(d.shape, mesh.grid_resolution).all(), field_group.values())) if len(candidates)==0: msg='Could not find any topology with shape {} for field {}, available discretizations are: {}.' - msg=msg.format(to_tuple(mesh.grid_resolution), field.name, + msg=msg.format(to_tuple(mesh.grid_resolution), field.name, ', '.join(set(str(d.shape) for d in field_group.values()))) raise_error(msg) elif len(candidates)==1: @@ -737,7 +737,7 @@ class CheckpointHandler(object): elif len(candidates)==1: load_array_data(candidates[0], dfield) continue - + # From now on multiple topologies have the same grid resolution and boundary conditions # We try to match exact ghost count, user did likely not change the order of the methods. old_candidates = candidates @@ -748,7 +748,7 @@ class CheckpointHandler(object): elif len(candidates)==1: load_array_data(candidates[0], dfield) continue - + # Now we try to differentiate by using zero ghost info (ghosts may change with method order, but zero-ghost is very specific) # Topology containing zero ghost layer usually target Fortran topologies for FFT operators or method that do not require any ghosts. old_candidates = candidates @@ -759,7 +759,7 @@ class CheckpointHandler(object): elif len(candidates)==1: load_array_data(candidates[0], dfield) continue - + # Now we try to match exact topology shape (the MPICart grid of processes) # We try this late because use may run the simulation again with a different number of processes. old_candidates = candidates @@ -770,7 +770,7 @@ class CheckpointHandler(object): elif len(candidates)==1: load_array_data(candidates[0], dfield) continue - + # Now we try to differentiate by using topo splitting info (axes on which data is distributed) # This again is very specific and can differentiate topologies used for spectral transforms. old_candidates = candidates @@ -781,7 +781,7 @@ class CheckpointHandler(object): elif len(candidates)==1: load_array_data(candidates[0], dfield) continue - + # Ok now, our last hope is to match the discrete field name old_candidates = candidates candidates = tuple(filter(lambda d: d.attrs['name'] == dfield.name, candidates)) @@ -791,19 +791,19 @@ class CheckpointHandler(object): elif len(candidates)==1: load_array_data(candidates[0], dfield) continue - + assert len(candidates) > 1, 'Something went wrong.' msg='Could not discriminate checkpointed topologies for field {}, got {} candidates remaining.' msg=msg.format(field.name, len(candidates)) raise_error(msg) - - + + @staticmethod def _load_array_data(array, dfield, on_mismatch): mesh = dfield.mesh._mesh assert np.equal(array.shape, mesh.grid_resolution).all() - + # compare attributes but ignore name because this can be annoying attr_names = ('left boundaries', 'right boundaries', 'ghost layers', 'process shape', 'datatype') array_attributes = (array.attrs['lboundaries'], array.attrs['rboundaries'], array.attrs['ghosts'], @@ -822,7 +822,7 @@ class CheckpointHandler(object): data = np.asarray(array[global_slices], dtype=dfield.dtype) dfield.compute_data[0][...] = data dfield.exchange_ghosts() - + @staticmethod def _raise_error(msg): vprint(' | error: {}\n'.format(msg)) @@ -834,7 +834,7 @@ class CheckpointHandler(object): def _raise_warning(msg): msg = ' | warning: {}'.format(msg) vprint(msg) - + @staticmethod def _format_zarr_key(k): # note keys that contains the special characters '/' and '\' do not work well with zarr diff --git a/hysop/core/memory/mempool.py b/hysop/core/memory/mempool.py index 2401a77c43b233284f39dfd6dc17c2f2e7eae039..db678c1b21847767ff3addeee237b6ec096a4c71 100644 --- a/hysop/core/memory/mempool.py +++ b/hysop/core/memory/mempool.py @@ -463,7 +463,7 @@ class MemoryPool(object, metaclass=ABCMeta): if nblocks == 0: continue has_block = True - mean_bytes = sum([b.size for b in blocks]) / nblocks + mean_bytes = sum([b.size for b in blocks]) / float(nblocks) ss += '\n *bin {}: nblocks={} mean_block_size={}'.format(bin_nr, nblocks, bytes2str(mean_bytes)) if not has_block: diff --git a/hysop/core/mpi/bridge.py b/hysop/core/mpi/bridge.py index ad49e3bb85411ff622bb795ff37c0f30437d166b..3bb2d4a89e2dcd95294e9fd79364fe78a67ae436 100644 --- a/hysop/core/mpi/bridge.py +++ b/hysop/core/mpi/bridge.py @@ -29,7 +29,7 @@ class Bridge(object): ---------- source, target : :class:`~hysop.topology.topology.CartesianTopology` topologies that own the source mesh and targeted mesh - dtype: + dtype: numpy dtype to be send and received """ # -- All dictionnaries below use rank number (in parent comm) @@ -80,7 +80,7 @@ class Bridge(object): """ # Get global indices of the mesh on source for all mpi processes. indices_source = TopoTools.gather_global_indices(self._source) - + # Get global indices of the mesh on target for all mpi processes. indices_target = TopoTools.gather_global_indices(self._target) # From now on, we have indices_source[rk] = global indices (slice) diff --git a/hysop/core/mpi/topo_tools.py b/hysop/core/mpi/topo_tools.py index b5bfe8d3f54e30f3ae724fcfc7f5ce44def0136e..a109623b165ce9af04aeb70ae9ae29690d51751b 100644 --- a/hysop/core/mpi/topo_tools.py +++ b/hysop/core/mpi/topo_tools.py @@ -338,7 +338,7 @@ class TopoTools(object): dimension = topo.domain.dim for i in range(dimension): ind = [j for j in range(len(reduced_res)) if j != i] - n_group[:, i] = reduced_res[ind] / group_size[:, i] + n_group[:, i] = reduced_res[ind] // group_size[:, i] tag_size = npw.asintegerarray(np.ceil(np.log10(n_group))) tag_rank = max(2, math.ceil(math.log10(3 * max(topo.shape)))) diff --git a/hysop/numerics/interpolation/polynomial.py b/hysop/numerics/interpolation/polynomial.py index 603ee580099ec6c98be0b4be23f8b4d69a806cea..c1e2e62778a72162faea2a9deb92ba14dc81462b 100644 --- a/hysop/numerics/interpolation/polynomial.py +++ b/hysop/numerics/interpolation/polynomial.py @@ -188,13 +188,13 @@ class PolynomialInterpolator(object): check_instance(fd, tuple, values=int, size=dim) p = tuple(degi+1 for degi in deg) - k = tuple((degi-1)/2 for degi in deg) + k = tuple((degi-1)//2 for degi in deg) ghosts = () n = () for (fdi,ki) in zip(fd,k): if (ki>0): - gi = (fdi/2) - 1 + (ki+1)/2 + gi = (fdi//2) - 1 + (ki+1)//2 else: gi = 0 ni = 2*(gi+1) @@ -648,7 +648,7 @@ class PolynomialSubgridRestrictor(object): g = tuple(ni*gri+1 for (ni,gri) in zip(n,gr)) G = np.prod(g, dtype=np.int64) assert all(gi%2==1 for gi in g) - origin = tuple(gi/2 for gi in g) + origin = tuple(gi//2 for gi in g) gvals, gvars = tensor_symbol('g',g,origin) I = 0 for idx in np.ndindex(*gr): diff --git a/hysop/numerics/stencil/stencil.py b/hysop/numerics/stencil/stencil.py index 05c1b91b3a54577f4c33f12346ab6b0c39031e21..cdf3d4efb3d1d2414685722e2aae4b78b775a6e4 100644 --- a/hysop/numerics/stencil/stencil.py +++ b/hysop/numerics/stencil/stencil.py @@ -102,7 +102,7 @@ class Stencil(object): assert (new_shape.ndim==shape.ndim) assert (new_shape>=shape).all() assert ((new_shape-shape)%2 == 0).all() - zeros = (new_shape-shape)/2 + zeros = (new_shape-shape)//2 slc = tuple(slice(z,z+s,1) for (z,s) in zip(zeros, shape)) new_origin = zeros + self.origin @@ -425,9 +425,9 @@ class CenteredStencil(Stencil): shape = np.asarray(coeffs.shape) if (shape%2==0).any(): raise ValueError('Shape compnonent even!') - if (origin!=(shape-1)/2).any(): + if (origin!=(shape-1)//2).any(): print(origin) - print((shape-1)/2) + print((shape-1)//2) raise ValueError('Origin is not centered!') super(CenteredStencil,self).__init__(coeffs, origin, order, dx, factor, error, **kwds) diff --git a/hysop/numerics/stencil/stencil_generator.py b/hysop/numerics/stencil/stencil_generator.py index 856b2a53922a09223f00621f86572a72a808596a..f621e998c05c1a62380c7d8b24a8fbe384d3f526 100644 --- a/hysop/numerics/stencil/stencil_generator.py +++ b/hysop/numerics/stencil/stencil_generator.py @@ -537,7 +537,7 @@ class CenteredStencilGenerator(StencilGenerator): config.configure(**kargs) shape = config.shape() - origin = (shape-1)/2 + origin = (shape-1)//2 stencil = super(CenteredStencilGenerator,self)\ .generate_exact_stencil(origin=origin, **kargs) if stencil.is_centered(): @@ -550,7 +550,7 @@ class CenteredStencilGenerator(StencilGenerator): config.configure(**kargs) shape = config.shape() - origin = (shape-1)/2 + origin = (shape-1)//2 stencil = super(CenteredStencilGenerator,self)\ .generate_approximative_stencil(origin, **kargs) if stencil.is_centered(): diff --git a/hysop/numerics/tests/bench_fft.py b/hysop/numerics/tests/bench_fft.py index ce252728f6aabb4baacaee4732d2ad4dadb546c5..7b625c1bbef68f4e5a2f51df9154c28033ef6bab 100644 --- a/hysop/numerics/tests/bench_fft.py +++ b/hysop/numerics/tests/bench_fft.py @@ -110,7 +110,7 @@ class BenchFFT(object): evt = plan.execute(wait_for=[evt]) evts += (evt,) evt.wait() - res = OpenClKernelStatistics(events=evts).mean/1e6 # ms + res = OpenClKernelStatistics(events=evts).mean / 1.0e6 # ms else: plan.execute() with Timer() as t: diff --git a/hysop/operator/base/advection_dir.py b/hysop/operator/base/advection_dir.py index 10026cad40c8fe3155f6e3dcec578f7c71446195..7c6331e0da487daf2c96b3be9772d6f067b21586 100644 --- a/hysop/operator/base/advection_dir.py +++ b/hysop/operator/base/advection_dir.py @@ -259,7 +259,7 @@ class DirectionalAdvectionBase(object): assert remesh_kernel.n >=1 , 'Bad remeshing kernel.' if remesh_kernel.n >1: assert remesh_kernel.n % 2 == 0, 'Odd remeshing kernel moments.' - min_ghosts = int(npw.floor(scalar_cfl)+1+remesh_kernel.n/2) + min_ghosts = int(npw.floor(scalar_cfl)+1+remesh_kernel.n//2) return min_ghosts @debug diff --git a/hysop/operator/tests/test_poisson.py b/hysop/operator/tests/test_poisson.py index 5ed9ed90f116bd907daa3c0983b4a8ac80d688d2..3de240114a485a10fbf77ba39ff0dc97ff3591c5 100644 --- a/hysop/operator/tests/test_poisson.py +++ b/hysop/operator/tests/test_poisson.py @@ -129,7 +129,7 @@ class TestPoissonOperator(object): domain=domain, Psi=Psi, W=W, polynomial=polynomial, nb_components=nb_components) if (max_runs is not None) and (i == max_runs): - missing = ((4**(dim+1) - 1) / 3) - i + missing = ((4**(dim+1) - 1) // 3) - i print() print('>> MAX RUNS ACHIEVED FOR {}D DOMAINS -- SKIPING {} OTHER BOUNDARY CONDITIONS <<'.format( dim, missing)) @@ -137,7 +137,7 @@ class TestPoissonOperator(object): print() break else: - assert (i == (4**(dim+1)-1)/3), (i+1, (4**(dim+1)-1)/3) + assert (i == (4**(dim+1)-1)//3), (i+1, (4**(dim+1)-1)//3) print() print('>> TESTED ALL {}D BOUNDARY CONDITIONS <<'.format(dim)) print() diff --git a/hysop/operator/tests/test_poisson_curl.py b/hysop/operator/tests/test_poisson_curl.py index 887a29209f90b9d5193659d04e34ff7f2a1b47a8..a7433ea1a65dae6d7f073ab419db4cf94e9e6069 100644 --- a/hysop/operator/tests/test_poisson_curl.py +++ b/hysop/operator/tests/test_poisson_curl.py @@ -128,14 +128,14 @@ class TestPoissonCurlOperator(object): self._test_one(shape=shape, dim=dim, dtype=dtype, domain=domain, W=W, U=U, polynomial=polynomial) if (max_runs is not None) and (i==max_runs): - missing = ((4**(dim+1) - 1) / 3) - i + missing = ((4**(dim+1) - 1) // 3) - i print() print('>> MAX RUNS ACHIEVED FOR {}D DOMAINS -- SKIPING {} OTHER BOUNDARY CONDITIONS <<'.format(dim, missing)) print() print() break else: - assert (i==(4**(dim+1)-1)/3), (i+1, (4**(dim+1)-1)/3) + assert (i==(4**(dim+1)-1)//3), (i+1, (4**(dim+1)-1)//3) print() print('>> TESTED ALL {}D BOUNDARY CONDITIONS <<'.format(dim)) print() diff --git a/hysop/operator/tests/test_solenoidal_projection.py b/hysop/operator/tests/test_solenoidal_projection.py index 8101f6d388f5c1a4a26af55f6f2b8088ea682681..966a8cf56793d64e944804b38bc88669f1017847 100644 --- a/hysop/operator/tests/test_solenoidal_projection.py +++ b/hysop/operator/tests/test_solenoidal_projection.py @@ -137,14 +137,14 @@ class TestSolenoidalProjectionOperator(object): domain=domain, U=U, U0=U0, U1=U1, divU=divU, divU0=divU0, divU1=divU1) if (max_runs is not None) and (i==max_runs): - missing = ((4**(dim+1) - 1) / 3) - i + missing = ((4**(dim+1) - 1) // 3) - i print() print('>> MAX RUNS ACHIEVED FOR {}D DOMAINS -- SKIPING {} OTHER BOUNDARY CONDITIONS <<'.format(dim, missing)) print() print() break else: - assert (i==(4**(dim+1)-1)/3), (i+1, (4**(dim+1)-1)/3) + assert (i==(4**(dim+1)-1)//3), (i+1, (4**(dim+1)-1)//3) print() print('>> TESTED ALL {}D BOUNDARY CONDITIONS <<'.format(dim)) print() diff --git a/hysop/operator/tests/test_spectral_curl.py b/hysop/operator/tests/test_spectral_curl.py index 126282cfea635cfdc3553ff92e52fd789e283529..3d36452ef3cbb2120469339ee2d52c55fce20007 100644 --- a/hysop/operator/tests/test_spectral_curl.py +++ b/hysop/operator/tests/test_spectral_curl.py @@ -131,7 +131,7 @@ class TestSpectralCurl(object): self._test_one(shape=shape, dim=dim, dtype=dtype, domain=domain, Fin=Fin, Fout=Fout, polynomial=polynomial) if (max_runs is not None) and (i == max_runs): - missing = ((4**(dim+1) - 1) / 3) - i + missing = ((4**(dim+1) - 1) // 3) - i print() print('>> MAX RUNS ACHIEVED FOR {}D DOMAINS -- SKIPING {} OTHER BOUNDARY CONDITIONS <<'.format( dim, missing)) @@ -139,7 +139,7 @@ class TestSpectralCurl(object): print() break else: - assert (i == (4**(dim+1)-1)/3), (i+1, (4**(dim+1)-1)/3) + assert (i == (4**(dim+1)-1)//3), (i+1, (4**(dim+1)-1)//3) print() print('>> TESTED ALL {}D BOUNDARY CONDITIONS <<'.format(dim)) print() diff --git a/hysop/operator/tests/test_spectral_derivative.py b/hysop/operator/tests/test_spectral_derivative.py index 3d98b15e53077774272a057ec5b3cdfe86d1e77a..3c705422b8162bb0215ff12b8073e1091a70c01e 100644 --- a/hysop/operator/tests/test_spectral_derivative.py +++ b/hysop/operator/tests/test_spectral_derivative.py @@ -135,7 +135,7 @@ class TestSpectralDerivative(object): max_derivative=max_derivative) if (max_runs is not None) and (i == max_runs): - missing = ((4**(dim+1) - 1) / 3) - i + missing = ((4**(dim+1) - 1) // 3) - i print() print('>> MAX RUNS ACHIEVED FOR {}D DOMAINS -- SKIPING {} OTHER BOUNDARY CONDITIONS <<'.format( dim, missing)) @@ -143,7 +143,7 @@ class TestSpectralDerivative(object): print() break else: - assert (i == (4**(dim+1)-1)/3), (i+1, (4**(dim+1)-1)/3) + assert (i == (4**(dim+1)-1)//3), (i+1, (4**(dim+1)-1)//3) print() print('>> TESTED ALL {}D BOUNDARY CONDITIONS <<'.format(dim)) print() diff --git a/hysop/problem.py b/hysop/problem.py index e7031a3626988605984c3676b7d3bbdfb35263de..236f2d9cf72aa0a105cb2c69f7b4f9e66f0c5f4e 100644 --- a/hysop/problem.py +++ b/hysop/problem.py @@ -130,14 +130,14 @@ class Problem(ComputationalGraph): def solve(self, simu, dry_run=False, dbg=None, report_freq=10, plot_freq=10, checkpoint_handler=None, **kwds): - + if dry_run: vprint() vprint_banner('** Dry-run requested, skipping simulation. **') return - + simu.initialize() - + check_instance(checkpoint_handler, CheckpointHandler, allow_none=True) checkpoint_handler.create_checkpoint_template(self, simu) checkpoint_handler.load_checkpoint(self, simu) @@ -154,7 +154,7 @@ class Problem(ComputationalGraph): checkpoint_handler.save_checkpoint(self, simu) if report_freq and (simu.current_iteration % report_freq) == 0: self.profiler_report() - + size = self.mpi_params.size avg_time = self.mpi_params.comm.allreduce(tm.interval) / size msg = ' Simulation took {} ({}s)' @@ -163,7 +163,7 @@ class Problem(ComputationalGraph): msg += '\n for {} iterations ({}s per iteration) ' msg = msg.format(datetime.timedelta(seconds=round(avg_time)), avg_time, max(simu.current_iteration+1, 1), - avg_time/max(simu.current_iteration+1, 1)) + avg_time / max(simu.current_iteration+1, 1)) vprint_banner(msg, spacing=True, at_border=2) simu.finalize() diff --git a/hysop/tools/io_utils.py b/hysop/tools/io_utils.py index dfaf7080a8c1c96a7595985ae06048438509be38..148aabff257f02361858d6ea2d5be3b2325b66d6 100755 --- a/hysop/tools/io_utils.py +++ b/hysop/tools/io_utils.py @@ -599,7 +599,7 @@ class Writer(object): str_fmt = '%g\t'*(shape1 - 1) + '%g\n' # use a big format string str_fmt_N = str_fmt * N - for i in range(shape0/N): + for i in range(shape0//N): a1 = a[i:i+N, :] # put a1 in 1D array form; ravel better than reshape for # non-contiguous arrays. diff --git a/hysop/tools/misc.py b/hysop/tools/misc.py index edc9fee78f362810a85c139e6e33eb01750739b6..22f3b47b499f721dd6c84a8bbe5984787bf297c5 100644 --- a/hysop/tools/misc.py +++ b/hysop/tools/misc.py @@ -91,9 +91,9 @@ def next_pow2(x): def previous_pow2(x): def _previous_pow2(x): assert x>=1 - y = upper_pow2(x)/2 + y = upper_pow2(x)//2 if x==y: - y = upper_pow2(x-1)/2 + y = upper_pow2(x-1)//2 return y if np.isscalar(x): @@ -257,7 +257,7 @@ class WorkSpaceTools(object): # Testing the buffer size instead of shape for i in range(lwork): wk = work[i] - s = wk.size / subsize[i] + s = wk.size // subsize[i] WorkSpaceTools._check_ocl_buffer(s, data_type) result = work diff --git a/hysop/tools/plotDrag.py b/hysop/tools/plotDrag.py index c36a062ce25bed7b5b95379a030c9b45e347108c..4d39e893c4bbaa948f11823fe757d51248e42fb6 100644 --- a/hysop/tools/plotDrag.py +++ b/hysop/tools/plotDrag.py @@ -43,7 +43,7 @@ def _ft_read(fileobj, commentchar='#'): filestr = re.sub(commentline, '', filestr) a = [float(x) for x in filestr.split()] data = np.array(a) - data.shape = (len(a)/shape1, shape1) + data.shape = (len(a)//shape1, shape1) return data diff --git a/hysop/topology/cartesian_descriptor.py b/hysop/topology/cartesian_descriptor.py index 0b8b86a15da94b0d868d1f440909c27dc55bd499..7e7ee3482d63ef1416b95ad79d44f08dc6dd2d11 100644 --- a/hysop/topology/cartesian_descriptor.py +++ b/hysop/topology/cartesian_descriptor.py @@ -19,38 +19,38 @@ class CartesianTopologyDescriptor(TopologyDescriptor): def __init__(self, mpi_params, domain, backend, cartesian_discretization, **kwds): """ Initialize a CartesianTopologyDescriptor. - + Notes ----- kwds allows for backend specific variables. CartesianTopologyDescriptor is immutable. """ - super(CartesianTopologyDescriptor, self).__init__(mpi_params=mpi_params, + super(CartesianTopologyDescriptor, self).__init__(mpi_params=mpi_params, domain=domain, backend=backend, **kwds) - + check_instance(cartesian_discretization, CartesianDiscretization) - - # check cartesian_discretization + + # check cartesian_discretization if (cartesian_discretization.ghosts > 0).any(): msg='No ghost allowed for a topology descriptor.' raise ValueError(msg) - + global_resolution = cartesian_discretization.global_resolution grid_resolution = cartesian_discretization.grid_resolution lboundaries = cartesian_discretization.lboundaries rboundaries = cartesian_discretization.rboundaries - + check_instance(grid_resolution, np.ndarray, size=domain.dim, minval=2) check_instance(global_resolution, np.ndarray, size=domain.dim, minval=2) - check_instance(lboundaries, npw.ndarray, dtype=object, + check_instance(lboundaries, npw.ndarray, dtype=object, size=domain.dim, values=BoundaryCondition, allow_none=True) - check_instance(rboundaries, npw.ndarray, dtype=object, - size=domain.dim, values=BoundaryCondition, + check_instance(rboundaries, npw.ndarray, dtype=object, + size=domain.dim, values=BoundaryCondition, allow_none=True) - - is_lperiodic = (lboundaries==BoundaryCondition.PERIODIC) - is_rperiodic = (rboundaries==BoundaryCondition.PERIODIC) + + is_lperiodic = (lboundaries==BoundaryCondition.PERIODIC) + is_rperiodic = (rboundaries==BoundaryCondition.PERIODIC) assert all((grid_resolution + is_lperiodic) == global_resolution) @@ -64,12 +64,12 @@ class CartesianTopologyDescriptor(TopologyDescriptor): self._cartesian_discretization = cartesian_discretization self._space_step = space_step - + @property def global_resolution(self): """Get the global global_resolution of the discretization (logical grid_size).""" return self._cartesian_discretization.global_resolution - + @property def grid_resolution(self): """Get the global grid resolution of the discretization (effective grid size).""" @@ -79,18 +79,18 @@ class CartesianTopologyDescriptor(TopologyDescriptor): def lboundaries(self): """Get the left boundaries.""" return self._cartesian_discretization.lboundaries - + @property def rboundaries(self): """Get the left boundaries.""" return self._cartesian_discretization.rboundaries - + @property def boundaries(self): """Get left and right boundaries.""" return (self._cartesian_discretization.lboundaries, self._cartesian_discretization.rboundaries) - + @property def space_step(self): """Get the space step.""" @@ -114,17 +114,17 @@ class CartesianTopologyDescriptor(TopologyDescriptor): def __hash__(self): # hash(super(...)) does not work as expected so be call __hash__ directly - h = super(CartesianTopologyDescriptor,self).__hash__() + h = super(CartesianTopologyDescriptor,self).__hash__() h ^= hash(self._cartesian_discretization) return h def __str__(self): return ':CartesianTopologyDescriptor: backend={}, domain={}, grid_resolution={}, bc=[{}]'.format( self.backend, self.domain.full_tag, - self.grid_resolution, + self.grid_resolution, ','.join(('{}/{}'.format( str(lb).replace('HOMOGENEOUS_','')[:3], - str(rb).replace('HOMOGENEOUS_','')[:3]) + str(rb).replace('HOMOGENEOUS_','')[:3]) for (lb,rb) in zip(*self.boundaries)))) @classmethod @@ -152,17 +152,17 @@ class CartesianTopologyDescriptor(TopologyDescriptor): msg+='they will be automatically determined from continuous fields.' raise ValueError(msg) global_resolution = handle.resolution - else: + else: global_resolution = handle cartesian_discretization = CartesianDiscretization(resolution=global_resolution, lboundaries=field.lboundaries_kind, rboundaries=field.rboundaries_kind, ghosts=None) - + kwds.setdefault('mpi_params', operator.mpi_params) kwds.setdefault('domain', field.domain) - return CartesianTopologyDescriptor(backend=backend, + return CartesianTopologyDescriptor(backend=backend, cartesian_discretization = cartesian_discretization, **kwds) elif isinstance(handle, CartesianTopologyDescriptor): @@ -171,14 +171,14 @@ class CartesianTopologyDescriptor(TopologyDescriptor): # handle is a CartesianTopology instance, ghosts and boundary conditions # can be imposed freely by user here. return handle - + def choose_topology(self, known_topologies, **kwds): """ Find optimal topology parameters from known_topologies. If None is returned, create_topology will be called instead. """ if known_topologies: - ordered_topologies = sorted(known_topologies, + ordered_topologies = sorted(known_topologies, key=lambda topo: sum(topo.ghosts)) return ordered_topologies[0] else: @@ -191,19 +191,19 @@ class CartesianTopologyDescriptor(TopologyDescriptor): by operators on variables and solved during operator's method get_field_requirements(). """ - discretization = CartesianDiscretization(resolution=self.grid_resolution, - lboundaries=self.lboundaries, + discretization = CartesianDiscretization(resolution=self.grid_resolution, + lboundaries=self.lboundaries, rboundaries=self.rboundaries, ghosts=ghosts) - return CartesianTopology(domain=self.domain, - discretization=discretization, + return CartesianTopology(domain=self.domain, + discretization=discretization, mpi_params=self.mpi_params, - cutdirs=cutdirs, - backend=self.backend, + cutdirs=cutdirs, + backend=self.backend, **self.extra_kwds) -CartesianTopologyDescriptors = (CartesianTopology, CartesianTopologyDescriptor, CartesianDiscretization, +CartesianTopologyDescriptors = (CartesianTopology, CartesianTopologyDescriptor, CartesianDiscretization, tuple, list, np.ndarray, type(None)) """ Instance of those types can be used to create a CartesianTopologyDescriptor. diff --git a/hysop_examples/examples/analytic/analytic.py b/hysop_examples/examples/analytic/analytic.py index be9bddad40836793a8e8c3f04f04c6443ede93c1..4c27e7253d624673531d6558a047f95fca9d6e35 100755 --- a/hysop_examples/examples/analytic/analytic.py +++ b/hysop_examples/examples/analytic/analytic.py @@ -110,9 +110,9 @@ def compute(args): max_iter=args.max_iter, times_of_interest=args.times_of_interest, t=t) - + # Finally solve the problem - problem.solve(simu, dry_run=args.dry_run, + problem.solve(simu, dry_run=args.dry_run, debug_dumper=args.debug_dumper, checkpoint_handler=args.checkpoint_handler) diff --git a/hysop_examples/examples/flow_around_sphere/flow_around_sphere.py b/hysop_examples/examples/flow_around_sphere/flow_around_sphere.py index 858e4bbf71e186cd381554276b68ccc966240156..059ff3c8196c8e9fe6064d8d1d1d740aed835a85 100644 --- a/hysop_examples/examples/flow_around_sphere/flow_around_sphere.py +++ b/hysop_examples/examples/flow_around_sphere/flow_around_sphere.py @@ -34,7 +34,7 @@ def compute(args): cfl = args.cfl lcfl = args.lcfl uinf = 1.0 - viscosity = 1. / 250. + viscosity = 1.0 / 250 outfreq = args.dump_freq dt0 = args.dt @@ -52,18 +52,18 @@ def compute(args): elif (impl is Implementation.OPENCL): # For the OpenCL implementation we need to setup the compute device # and configure how the code is generated and compiled at runtime. - + # Create an explicit OpenCL context from user parameters from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number cl_env = get_or_create_opencl_env( mpi_params=mpi_params, - platform_id=args.cl_platform_id, + platform_id=args.cl_platform_id, device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None) - + # Configure OpenCL kernel generation and tuning (already done by HysopArgParser) from hysop.methods import OpenClKernelConfig method = { OpenClKernelConfig: args.opencl_kernel_config } - + # Setup opencl specific extra operator keyword arguments extra_op_kwds['cl_env'] = cl_env else: @@ -171,7 +171,7 @@ def compute(args): advec_dir = DirectionalAdvection( implementation=impl, name='advec', - velocity = velo, + velocity = velo, advected_fields = (vorti, ), velocity_cfl = args.cfl, variables = {velo: npts, vorti: npts}, @@ -182,7 +182,7 @@ def compute(args): else: StretchOp = StaticDirectionalStretching stretch = StretchOp( - implementation=Implementation.PYTHON if implIsFortran else impl, + implementation=Implementation.PYTHON if implIsFortran else impl, name='stretch', formulation=StretchingFormulation.CONSERVATIVE, velocity=velo, @@ -314,13 +314,13 @@ def compute(args): simu.write_parameters(t, dt_cfl, dt_advec, dt, enstrophy, flowrate, min_max_U.Finf, min_max_W.Finf, adapt_dt.equivalent_CFL, filename='parameters.txt', precision=8) - + problem.initialize_field(vorti, formula=computeVort) problem.initialize_field(velo, formula=computeVel) problem.initialize_field(sphere, formula=computeSphere) # Finally solve the problem - problem.solve(simu, dry_run=args.dry_run, + problem.solve(simu, dry_run=args.dry_run, debug_dumper=args.debug_dumper, checkpoint_handler=args.checkpoint_handler) diff --git a/hysop_examples/examples/shear_layer/shear_layer.py b/hysop_examples/examples/shear_layer/shear_layer.py index 05bbe3f7c68bce6009cd3fa69975618107e557cf..39cea288a91858e25057efa3831c2e71f9857424 100644 --- a/hysop_examples/examples/shear_layer/shear_layer.py +++ b/hysop_examples/examples/shear_layer/shear_layer.py @@ -1,6 +1,6 @@ ## HySoP Example: Shear Layer 2D -## See Brown 1995: +## See Brown 1995: ## Performance of under-resolved two dimensional incompressible flow simulations import sympy as sm import numpy as np @@ -21,16 +21,16 @@ def compute(args): ComputeGranularity, Interpolation from hysop.symbolic import space_symbols - + # Define the domain dim = args.ndim npts = args.npts box = Box(origin=args.box_origin, length=args.box_length, dim=dim) - + # Get default MPI Parameters from domain (even for serial jobs) mpi_params = MPIParams(comm=box.task_comm, task_id=box.current_task()) - + # Setup usual implementation specific variables impl = args.impl extra_op_kwds = { 'mpi_params': mpi_params } @@ -39,30 +39,30 @@ def compute(args): elif (impl is Implementation.OPENCL): # For the OpenCL implementation we need to setup the compute device # and configure how the code is generated and compiled at runtime. - + # Create an explicit OpenCL context from user parameters from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number cl_env = get_or_create_opencl_env( mpi_params=mpi_params, - platform_id=args.cl_platform_id, + platform_id=args.cl_platform_id, device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None) - + # Configure OpenCL kernel generation and tuning (already done by HysopArgParser) from hysop.methods import OpenClKernelConfig method = { OpenClKernelConfig: args.opencl_kernel_config } - + # Setup opencl specific extra operator keyword arguments extra_op_kwds['cl_env'] = cl_env else: msg='Unknown implementation \'{}\'.'.format(impl) raise ValueError(msg) - - # Get back user paramaters + + # Get back user paramaters # rho: thickness of the shear layers # delta: the strength of the initial perturbation rho = args.rho delta = args.delta - + # Compute initial vorticity fomula symbolically # and define the function to compute initial vorticity. (x,y) = space_symbols[:2] @@ -91,7 +91,7 @@ def compute(args): velo = VelocityField(domain=box, dtype=args.dtype) vorti = VorticityField(velocity=velo) nu = ViscosityParameter(initial_value=args.nu, const=True, dtype=args.dtype) - + ### Build the directional operators if (impl is Implementation.FORTRAN) and (dim==3): #> Nd advection @@ -103,11 +103,11 @@ def compute(args): variables = {velo: npts, vorti: npts}, dt=dt, **extra_op_kwds) else: - #> Directional advection + #> Directional advection impl_advec = Implementation.PYTHON if (impl is Implementation.FORTRAN) else impl advec_dir = DirectionalAdvection(implementation=impl_advec, name='advection_remesh', - velocity = velo, + velocity = velo, advected_fields = (vorti,), velocity_cfl = args.cfl, variables = {velo: npts, vorti: npts}, @@ -119,22 +119,22 @@ def compute(args): ### Build standard operators #> Poisson operator to recover the velocity from the vorticity - poisson = PoissonCurl(name='poisson_curl', - velocity=velo, vorticity=vorti, - variables={velo:npts, vorti: npts}, + poisson = PoissonCurl(name='poisson_curl', + velocity=velo, vorticity=vorti, + variables={velo:npts, vorti: npts}, projection=args.reprojection_frequency, diffusion=nu, dt=dt, - implementation=impl, + implementation=impl, enforce_implementation=args.enforce_implementation, **extra_op_kwds) #> We ask to dump the inputs and the outputs of this operator poisson.dump_outputs(fields=(vorti,), io_params=args.io_params.clone(filename='vorti'), **extra_op_kwds) - poisson.dump_outputs(fields=(velo,), + poisson.dump_outputs(fields=(velo,), io_params=args.io_params.clone(filename='velo'), **extra_op_kwds) - + #> Operator to compute the infinite norm of the velocity if (impl is Implementation.FORTRAN): impl = Implementation.PYTHON @@ -145,19 +145,19 @@ def compute(args): min_max_W = MinMaxFieldStatistics(name='min_max_W', field=vorti, Finf=True, implementation=impl, variables={vorti:npts}, **extra_op_kwds) - + ### Adaptive timestep operator adapt_dt = AdaptiveTimeStep(dt) adapt_dt.push_cfl_criteria(cfl=args.cfl, Finf=min_max_U.Finf) - adapt_dt.push_advection_criteria(lcfl=args.lcfl, Finf=min_max_W.Finf, + adapt_dt.push_advection_criteria(lcfl=args.lcfl, Finf=min_max_W.Finf, criteria=AdvectionCriteria.W_INF) - ## Create the problem we want to solve and insert our + ## Create the problem we want to solve and insert our # directional splitting subgraph and the standard operators. # The method dictionnary passed to this graph will be dispatched # accross all operators contained in the graph. method.update( - { + { ComputeGranularity: args.compute_granularity, SpaceDiscretization: args.fd_order, TimeIntegrator: args.time_integrator, @@ -167,31 +167,31 @@ def compute(args): problem = Problem(method=method) problem.insert(poisson, advec, min_max_U, min_max_W, adapt_dt) problem.build(args) - + # If a visu_rank was provided, and show_graph was set, # display the graph on the given process rank. if args.display_graph: problem.display(args.visu_rank) - + # Create a simulation # (do not forget to specify the t and dt parameters here) - simu = Simulation(start=args.tstart, end=args.tend, + simu = Simulation(start=args.tstart, end=args.tend, nb_iter=args.nb_iter, max_iter=args.max_iter, dt0=args.dt, times_of_interest=args.times_of_interest, t=t, dt=dt) simu.write_parameters(t, dt, filename='parameters.txt', precision=4) - + # Initialize only the vorticity problem.initialize_field(velo, formula=init_velocity) problem.initialize_field(vorti, formula=init_vorticity) - - # Finally solve the problem - problem.solve(simu, dry_run=args.dry_run, + + # Finally solve the problem + problem.solve(simu, dry_run=args.dry_run, debug_dumper=args.debug_dumper, checkpoint_handler=args.checkpoint_handler, plot_freq=args.plot_freq) - + # Finalize problem.finalize() @@ -202,7 +202,7 @@ if __name__=='__main__': class ShearLayerArgParser(HysopArgParser): def __init__(self): prog_name = 'shear_layer' - default_dump_dir = '{}/hysop_examples/{}'.format(HysopArgParser.tmp_dir(), + default_dump_dir = '{}/hysop_examples/{}'.format(HysopArgParser.tmp_dir(), prog_name) description=colors.color('HySoP Shear Layer Example: ', fg='blue', style='bold') @@ -226,7 +226,7 @@ if __name__=='__main__': description+='\n' description+='\nSee the original paper at ' description+='http://crd.lbl.gov/assets/pubs_presos/underIIJCP.pdf.' - + super(ShearLayerArgParser, self).__init__( prog_name=prog_name, description=description, @@ -260,7 +260,7 @@ if __name__=='__main__': msg='Case parameter should be 0, 1 or 2, got {}.' msg=msg.format(args.case) self.error(msg) - + def _add_graphical_io_args(self): graphical_io = super(ShearLayerArgParser, self)._add_graphical_io_args() graphical_io.add_argument('-pw', '--plot-vorticity', action='store_true', @@ -268,17 +268,17 @@ if __name__=='__main__': help=('Plot the vorticity component during simulation. '+ 'Simulation will stop at each time of interest and '+ 'the plot will be updated every specified freq iterations.')) - graphical_io.add_argument('-pf', '--plot-freq', type=int, default=10, + graphical_io.add_argument('-pf', '--plot-freq', type=int, default=10, dest='plot_freq', - help=('Plot frequency in terms of iterations.' + help=('Plot frequency in terms of iterations.' +' Use 0 to disable frequency based plotting.')) - + def _check_file_io_args(self, args): super(ShearLayerArgParser, self)._check_file_io_args(args) self._check_default(args, 'plot_vorticity', bool, allow_none=False) self._check_default(args, 'plot_freq', int, allow_none=False) self._check_positive(args, 'plot_freq', strict=False, allow_none=False) - + def _setup_parameters(self, args): super(ShearLayerArgParser, self)._setup_parameters(args) from hysop.tools.types import first_not_None @@ -293,7 +293,7 @@ if __name__=='__main__': args.nu = first_not_None(args.nu, nu_defaults[case]) self._check_positive(args, ('rho','delta','nu'), strict=True, allow_none=False) - + if (args.ndim != 2): msg='This example only works for 2D domains.' self.error(msg) @@ -301,8 +301,8 @@ if __name__=='__main__': parser = ShearLayerArgParser() parser.set_defaults(impl='cl', ndim=2, npts=(256,), - box_origin=(0.0,), box_length=(1.0,), - tstart=0.0, tend=1.25, + box_origin=(0.0,), box_length=(1.0,), + tstart=0.0, tend=1.25, dt=1e-4, cfl=0.5, lcfl=0.125, case=0, dump_freq=0, dump_times=(0.8, 1.20)) diff --git a/opencl_explore.py b/opencl_explore.py index 65024da3611f2e2046113f8d53d74a891ebfabcc..85060df8f6fe752bb349ba95cf6036c22967f5c4 100644 --- a/opencl_explore.py +++ b/opencl_explore.py @@ -11,7 +11,7 @@ def size_human(s): c = ['b', 'Kb', 'Mb', 'Gb'] i = 0 while s >= 1024: - s /= 1024 + s //= 1024 i += 1 return str(int(s)) + c[i]