From 12a4e5e3d13509475e3214a13854bdabe29c7733 Mon Sep 17 00:00:00 2001 From: Keck Jean-Baptiste <jean-baptiste.keck@imag.fr> Date: Sun, 21 May 2017 13:43:38 +0200 Subject: [PATCH] fixed codegen array variable --- hysop/backend/device/codegen/base/codegen.py | 31 ++-- .../backend/device/codegen/base/variables.py | 135 ++++++++++++++---- .../codegen/kernels/directional_remesh.py | 99 +++++-------- .../device/opencl/opencl_array_backend.py | 4 +- hysop/deps.py | 2 +- 5 files changed, 165 insertions(+), 106 deletions(-) diff --git a/hysop/backend/device/codegen/base/codegen.py b/hysop/backend/device/codegen/base/codegen.py index a4b397e59..0d313e3e8 100644 --- a/hysop/backend/device/codegen/base/codegen.py +++ b/hysop/backend/device/codegen/base/codegen.py @@ -2,12 +2,11 @@ from contextlib import contextmanager from subprocess import call -import sys, os, string, tempfile, operator -import itertools as it - -import pyopencl as cl - +from hysop.tools.types import check_instance +from hysop.deps import it, sys, os, string, tempfile, operator +from hysop.backend.device.opencl import cl from hysop.backend.device.codegen.base.utils import WriteOnceDict, VarDict +from hysop.backend.device.codegen.base.variables import CodegenVariable class CodeGenerator(object): @@ -249,13 +248,25 @@ class CodeGenerator(object): else: self.append(code) - def decl_vars(self, *variables): + def decl_vars(self, *variables, **kargs): assert len(set(var.base_ctype() for var in variables))==1 base= variables[0].base_ctype() svars=[] for var in variables: - svars.append(var.declare(multidecl=True)) - return '{} {};'.format(base, ', '.join(svars)) + check_instance(var, CodegenVariable) + svars.append(var.declare(multidecl=True, **kargs)) + decl = '{} {};'.format(base, ', '.join(svars)) + self.append(decl) + + def decl_aligned_vars(self, *variables, **kargs): + assert len(variables)>0 + jmp = kargs.pop('jmp', True) + with self._align_() as al: + for var in variables: + check_instance(var, CodegenVariable) + var.declare(codegen=al, align=True, **kargs) + if jmp: + al.jumpline() class VarBlock(object): @@ -335,7 +346,9 @@ class CodeGenerator(object): def code(self): if self._parts_count is None: - raise RuntimeError('Call at least one append() before closing an _align_!') + msg='Call at least one append() before closing an _align_!' + msg+='\n got: {}'.format(self._lines) + raise RuntimeError(msg) maxlen = lambda i: max([len(line[i]) for line in self._lines if len(line)>1]) line_str = '' diff --git a/hysop/backend/device/codegen/base/variables.py b/hysop/backend/device/codegen/base/variables.py index 430840bc2..3d454ea62 100644 --- a/hysop/backend/device/codegen/base/variables.py +++ b/hysop/backend/device/codegen/base/variables.py @@ -82,11 +82,6 @@ class CodegenVariable(object): check_instance(add_impl_const, bool) check_instance(nl, bool, allow_none=True) - if const and add_impl_const: - msg='Variable {} is const and add_impl_const has been specified!' - msg=msg.format(name) - raise ValueError(msg) - self.name = name self.ctype = ctype self.typegen = typegen @@ -158,6 +153,20 @@ class CodegenVariable(object): self.value = value self.svalue = svalue self.init=init + + # check + if add_impl_const: + if (not is_ptr): + if const: + msg='Variable {} is const and add_impl_const has been specified!' + msg=msg.format(name) + raise ValueError(msg) + else: + if ptr_const[-1]: + msg='Variable {} has ptr_const[-1]=True and add_impl_const has been specified!' + msg=msg.format(name) + raise ValueError(msg) + def newvar(name, nl=False, storage=None, value=None, svalue=None, init=None, @@ -219,7 +228,8 @@ class CodegenVariable(object): def base_ctype(self, storage=None, ctype=None, const=None, volatile=False, - impl=True, align=False): + impl=True, align=False, + add_impl_const=None): storage = self.storage if (storage is None) else storage ctype = self.ctype if (ctype is None) else ctype volatile = self.volatile if (volatile is None) else volatile @@ -227,9 +237,9 @@ class CodegenVariable(object): if (const is None): const = self.const if impl and (not self.is_ptr) and (not const): - const = self.add_impl_const + const = self.add_impl_const if (add_impl_const is None) else add_impl_const - base_ctype = '{storage}${ctype}${const}${volatile}'.format( + base_ctype = '{storage}${const}${volatile}${ctype}'.format( storage='{} '.format(storage) if (storage is not None) else '', const='const ' if const else '', volatile='volatile ' if volatile else '', @@ -237,20 +247,15 @@ class CodegenVariable(object): if not align: base_ctype = base_ctype.replace('$','') return base_ctype.strip() - - def full_ctype(self, storage=None, ctype=None, const=None, volatile=False, - impl=True, multidecl=False, align=False, cast=False): - if multidecl: - base_ctype = '' - else: - base_ctype = self.base_ctype(storage,ctype,const,volatile,impl,align) - + + def ptr_ctype(self, impl=True, add_impl_const=None, cast=False): if self.is_ptr: ptrs=[] + add_impl_const = self.add_impl_const if (add_impl_const is None) else add_impl_const for i, (c,v,r) in enumerate(zip(self.ptr_const, self.ptr_volatile, self.ptr_restrict)): if i==self.ptr_level-1: - c = c or (impl and self.add_impl_const) + c = c or (impl and add_impl_const) ptr=' $*{const}${volatile}${restrict}'.format( const = 'const ' if (c and not cast) else '', volatile = 'volatile ' if (v and not cast) else '', @@ -259,6 +264,22 @@ class CodegenVariable(object): ptr_ctype = ''.join(ptrs) else: ptr_ctype='' + return ptr_ctype + + def full_ctype(self, storage=None, ctype=None, const=None, volatile=False, + impl=True, multidecl=False, align=False, cast=False, + add_impl_const=None): + + if multidecl: + base_ctype = '' + else: + base_ctype = self.base_ctype(storage,ctype,const,volatile,impl,align, + add_impl_const=add_impl_const) + if len(base_ctype)==0: + msg= 'Failed to get base ctype in {}.'.format(self.__class__) + raise RuntimeError(msg) + + ptr_ctype = self.ptr_ctype(impl=impl, add_impl_const=add_impl_const, cast=cast) full_ctype = '{}{}'.format(base_ctype, ptr_ctype) if not align: @@ -316,27 +337,39 @@ class CodegenVariable(object): acc += '->' if self.is_ptr else '.' return acc + def decl_name(self): + return self.name + def declare(self, codegen=None, align=False, - multidecl=False, const=None, init=None): - - ctype = self.full_ctype(align=align,multidecl=multidecl,const=const) + multidecl=False, const=None, init=None, + compact=False): + # const means add_impl_const, ie. declare current variable as constant (not pointed types) + ctype = self.full_ctype(align=align,multidecl=multidecl,add_impl_const=const) + if (not multidecl) and len(ctype)==0: + msg= 'Failed to get full ctype in {}.'.format(self.__class__) + raise RuntimeError(msg) # static array ctype needs to be split - split = ctype.split('[',1) - ctype = split[0] - is_static_array = len(split)==2 - name_suffix = ('['+split[1]) if is_static_array else '' - name = self.name+name_suffix + name = self.decl_name() init = init if (init is not None) else self.init - code = '{} ${}'.format(ctype, name) + if (len(ctype)>0) and ctype[-1]=='*': + code = '{}${}'.format(ctype, name) + else: + code = '{} ${}'.format(ctype, name) if (init is not None): - code = '{} $= {}'.format(code,init) + if compact: + code = '{}={}'.format(code,init) + else: + code = '{} $= {}'.format(code,init) elif self.known(): self.force_symbolic(False) sval = self.sval() - code = '{} $= {}'.format(code,sval) + if compact: + code = '{}={}'.format(code,sval) + else: + code = '{} $= {}'.format(code,sval) if not multidecl: code+=';' @@ -347,7 +380,7 @@ class CodegenVariable(object): code = code.replace('$','') if codegen is not None: codegen.append(code) - return code + return code.strip() def __getitem__(self,ss): if self.is_ptr: @@ -499,6 +532,46 @@ class CodegenArray(CodegenVariable): symbolic_mode=symbolic_mode, struct_var=struct_var) self.shape = shape self.sshape = sshape + + def decl_name(self): + if self.shape: + static_array = ['[{}]'.format(val) for val in self.shape] + elif self.shape: + static_array = ['[{}]'.format(val) for val in self.sshape] + else: + static_array = [] + return '{}{}'.format(self.name, ''.join(static_array)) + + def array_dim(self): + if (self.shape is not None): + return self.shape.size + if (self.sshape is not None): + return self.sshape.size + msg='unknown array dim.' + raise RuntimeError(msg) + + def ptr_ctype(self, impl=True, add_impl_const=None, cast=False): + if self.is_ptr: + add_impl_const = self.add_impl_const if (add_impl_const is None) else add_impl_const + + dim = self.array_dim() + ptr_const = self.ptr_const[dim:] + ptr_volatile = self.ptr_volatile[dim:] + ptr_restrict = self.ptr_restrict[dim:] + + ptrs=[] + for i, (c,v,r) in enumerate(zip(ptr_const, ptr_volatile, ptr_restrict)): + if i==self.ptr_level-1: + c = c or (impl and add_impl_const) + ptr=' $*{const}${volatile}${restrict}'.format( + const = 'const ' if (c and not cast) else '', + volatile = 'volatile ' if (v and not cast) else '', + restrict = 'restrict ' if (r and not cast) else '') + ptrs.append(ptr) + ptr_ctype = ''.join(ptrs) + else: + ptr_ctype='' + return ptr_ctype @@ -663,14 +736,14 @@ class CodegenVectorClBuiltin(CodegenVector): else: raise TypeError, 'Invalid key type!' - def declare(self,codegen=None,align=False,const=None,init=None): + def declare(self, init=None, **kargs): if isinstance(init,int): init = ','.join([self.typegen.dump(init) for _ in xrange(self.dim)]) init = '({})({})'.format(self.ctype,init) elif init.__class__ in [list,tuple,np.ndarray]: init = ','.join([self.typegen.dump(init[i]) for i in xrange(self.dim)]) init = '({})({})'.format(self.ctype,init) - return super(CodegenVectorClBuiltin,self).declare(codegen=codegen,align=align,const=const,init=init) + return super(CodegenVectorClBuiltin,self).declare(init=init, **kargs) class CodegenVectorClBuiltinFunc(CodegenVectorClBuiltin): def __init__(self,fname,name,btype,dim,typegen, diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py index 2aee3c016..2199743e9 100644 --- a/hysop/backend/device/codegen/kernels/directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/directional_remesh.py @@ -43,27 +43,26 @@ from hysop.constants import DirectionLabels class DirectionalRemeshKernel(KernelCodeGenerator): @staticmethod - def codegen_name(work_dim, direction, + def codegen_name(work_dim, remesh_kernel, ftype, nparticles, nscalars, remesh_criteria_eps, use_atomics, is_inplace): inplace = 'inplace_' if is_inplace else '' atomic = 'atomic_' if use_atomics else '' - criteria = '{}eps__' if (remesh_criteria_eps is not None) else '' - return 'directional_{}{}remesh_{}d__lambda_{}_{}__{}__{}p__{}s__{}{}'.format( + criteria = '{}eps__' if (remesh_criteria_eps is not None) else 'full' + return 'directional_{}{}remesh_{}d__lambda_{}_{}__{}__{}p__{}s__{}'.format( inplace, atomic, work_dim, remesh_kernel.n, remesh_kernel.r, ftype, nparticles, nscalars, - criteria, - DirectionLabels[direction]) + criteria) @staticmethod def cache_ghosts(scalar_cfl, remesh_kernel): assert remesh_kernel.n % 2 == 0 return int(1+math.ceil(scalar_cfl)+remesh_kernel.n/2) - def __init__(self, typegen, work_dim, direction, ftype, + def __init__(self, typegen, work_dim, ftype, nparticles, nscalars, sboundary, is_inplace, scalar_cfl, remesh_kernel, remesh_criteria_eps=None, @@ -73,7 +72,6 @@ class DirectionalRemeshKernel(KernelCodeGenerator): known_vars = None): assert work_dim>0 and work_dim<=3 - assert direction>=0 and direction<work_dim assert nscalars>0 assert nparticles in [1,2,4,8,16] check_instance(sboundary[0],BoundaryCondition) @@ -92,7 +90,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): vftype = tg.vtype(ftype, nparticles) vitype = tg.vtype(itype, nparticles) - name = DirectionalRemeshKernel.codegen_name(work_dim, direction, + name = DirectionalRemeshKernel.codegen_name(work_dim, remesh_kernel, ftype, nparticles,nscalars, remesh_criteria_eps, use_atomics, is_inplace) @@ -122,7 +120,6 @@ class DirectionalRemeshKernel(KernelCodeGenerator): self.vitype = vitype self.vftype = vftype self.work_dim = work_dim - self.direction = direction self.sboundary = sboundary self.nparticles = nparticles self.nscalars = nscalars @@ -154,22 +151,22 @@ class DirectionalRemeshKernel(KernelCodeGenerator): kargs = ArgDict() self.position = OpenClArrayBackend.build_codegen_argument(kargs, name='position', storage=self._global, ctype=ftype, typegen=typegen, - ptr_restrict=True, ptr_const=True) + ptr_restrict=True, const=True) if is_inplace: self.scalars_in = tuple( - OpenClArrayBackend.build_codegen_argument(kargs, name=' s{}_in'.format(i), + OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_inout'.format(i), storage=self._global, ctype=ftype, typegen=typegen, - ptr_restrict=True, ptr_const=False) for i in xrange(nscalars)) + const=False, ptr_restrict=True) for i in xrange(nscalars)) self.scalars_out = self.scalars_in else: self.scalars_in = tuple( - OpenClArrayBackend.build_codegen_argument(kargs, name='s{}_in'.format(i), + OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_in'.format(i), storage=self._global, ctype=ftype, typegen=typegen, - ptr_restrict=True, ptr_const=True) for i in xrange(nscalars)) + const=True, ptr_restrict=True) for i in xrange(nscalars)) self.scalars_out = tuple( - OpenClArrayBackend.build_codegen_argument(kargs, name='s{}_out'.format(i), + OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_out'.format(i), storage=self._global, ctype=ftype, typegen=typegen, - ptr_restrict=True, ptr_const=False) for i in xrange(nscalars)) + const=False, ptr_restrict=True) for i in xrange(nscalars)) if debug_mode: kargs['dbg0'] = CodegenVariable(storage=self._global,name='dbg0',ctype=itype, @@ -233,7 +230,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): scalars_out = s.scalars_out compute_grid_size = position_mesh_info['local_mesh']['compute_resolution'].view( - 'compute_grid_size',slice(None,work_dim)) + 'compute_grid_size',slice(None,work_dim),const=True) position_grid_ghosts = position_mesh_info['ghosts'].view( 'pos_grid_ghosts', slice(0,work_dim), const=True) @@ -333,59 +330,36 @@ class DirectionalRemeshKernel(KernelCodeGenerator): yield ctx except: raise - - nested_loops = [_work_iterate_(i) for i in xrange(dim-1,-1,-1)] - + + with s._kernel_(): s.jumpline() - with s._align_() as al: - position.declare(al,const=True,align=True) - for i in xrange(nscalars): - scalars_in[i].declare(al,const=True,align=True) - if not is_inplace: - for i in xrange(nscalars): - scalars_out[i].declare(al,const=False,align=True) - s.jumpline() - with s._align_() as al: - local_id.declare(al,align=True,const=True) - global_size.declare(al,align=True,const=True) - local_size.declare(al,align=True,const=True) - s.jumpline() - with s._align_() as al: - xmin.declare(al,align=True) - inv_dx.declare(al,align=True) - dx.declare(al,align=True) - s.jumpline() + if is_inplace: + s.decl_aligned_vars(*((position,)+scalars_in)) + else: + s.decl_aligned_vars(*((position,)+scalars_in+scalars_out)) - with s._align_() as al: - npart.declare(al, const=True, align=True) - cache_ghosts.declare(al,const=True, align=True) - cache_width.declare(al,const=True, align=True) - local_work.declare(al,const=True, align=True) - s.jumpline() + s.decl_aligned_vars(local_id, global_size, local_size, const=True) - compute_grid_size.declare(s,const=True) - s.jumpline() - with s._align_() as al: - position_grid_ghosts.declare(al,align=True) - for sgg in scalars_grid_ghosts: - sgg.declare(al,align=True) - s.jumpline() + s.decl_aligned_vars(xmin, inv_dx, dx, const=True) - with s._align_() as al: - position_global_id.declare(al,align=True) - for sgid in scalars_global_id: - sgid.declare(al,align=True) - s.jumpline() + s.decl_aligned_vars(npart, cache_ghosts, cache_width, local_work, const=True) - #with s._align_() as al: - #for var in cached_scalars: - #var.declare(al,align=True); - s.decl_vars(*cached_scalars) + s.decl_aligned_vars(compute_grid_size, position_grid_ghosts, *scalars_grid_ghosts) + + s.decl_vars(position_global_id, *scalars_global_id) s.jumpline() - + + if local_size_known: + s.decl_vars(*cached_scalars) + s.jumpline() + else: + s.decl_aligned_vars(*cached_scalars) + + + nested_loops = [_work_iterate_(i) for i in xrange(dim-1,-1,-1)] with contextlib.nested(*nested_loops): # s.barrier(_local=True) # s.jumpline() @@ -411,7 +385,6 @@ if __name__ == '__main__': dak = DirectionalRemeshKernel(typegen=tg, ftype=tg.fbtype, work_dim=work_dim, - direction=2, nparticles=4, nscalars=2, remesh_kernel=kernel, @@ -424,7 +397,7 @@ if __name__ == '__main__': s0_mesh_info=smesh_info, s1_mesh_info=smesh_info, position_mesh_info=pmesh_info, - local_size=local_size[:work_dim], + #local_size=local_size[:work_dim], global_size=global_size[:work_dim] ) ) diff --git a/hysop/backend/device/opencl/opencl_array_backend.py b/hysop/backend/device/opencl/opencl_array_backend.py index d53ee9498..47bfa8fa2 100644 --- a/hysop/backend/device/opencl/opencl_array_backend.py +++ b/hysop/backend/device/opencl/opencl_array_backend.py @@ -2993,8 +2993,8 @@ class OpenClArrayBackend(ArrayBackend): typegen=typegen, ctype=itype, add_impl_const=True, nl=True) - char_alias = args[base].full_ctype(ctype='char', cast=True) - ctype_alias = args[base].full_ctype(cast=True) + char_alias = args[base].full_ctype(ctype='char', cast=True, align=True) + ctype_alias = args[base].full_ctype(cast=True, align=True) init = '({})(({})({})+{})'.format(ctype_alias, char_alias, base, offset) var = CodegenVariable(name=name, typegen=typegen, diff --git a/hysop/deps.py b/hysop/deps.py index 6f41dbbcb..342a42e5c 100644 --- a/hysop/deps.py +++ b/hysop/deps.py @@ -21,7 +21,7 @@ except ImportError as e: print(msg) import sys, os, subprocess, platform -import resource, psutil +import resource, psutil, tempfile import inspect, functools, operator import hashlib, gzip, copy, types, string import math, re, contextlib -- GitLab