diff --git a/hysop/backend/device/codegen/base/codegen.py b/hysop/backend/device/codegen/base/codegen.py index 0d313e3e82a795fbdabe2f52122bfaa259b6c9a2..d93e69142ac3e4cd0ee6a10fefb8b4fe340ce5f4 100644 --- a/hysop/backend/device/codegen/base/codegen.py +++ b/hysop/backend/device/codegen/base/codegen.py @@ -249,24 +249,34 @@ class CodeGenerator(object): self.append(code) def decl_vars(self, *variables, **kargs): + codegen = kargs.pop('codegen', None) + align = kargs.pop('align', False) assert len(set(var.base_ctype() for var in variables))==1 base= variables[0].base_ctype() svars=[] for var in variables: check_instance(var, CodegenVariable) svars.append(var.declare(multidecl=True, **kargs)) - decl = '{} {};'.format(base, ', '.join(svars)) - self.append(decl) + decl = '{} ${};'.format(base, ', '.join(svars)) + if not align: + decl = decl.replace('$','') + if (codegen is None): + self.append(decl) + else: + codegen.append(decl) def decl_aligned_vars(self, *variables, **kargs): assert len(variables)>0 jmp = kargs.pop('jmp', True) - with self._align_() as al: - for var in variables: - check_instance(var, CodegenVariable) - var.declare(codegen=al, align=True, **kargs) - if jmp: - al.jumpline() + try: + with self._align_() as al: + for var in variables: + check_instance(var, CodegenVariable) + var.declare(codegen=al, align=True, **kargs) + if jmp: + al.jumpline() + except: + raise class VarBlock(object): diff --git a/hysop/backend/device/codegen/base/utils.py b/hysop/backend/device/codegen/base/utils.py index ee926da994be988509a1a5e8f51f590ad9082194..067786d87e1ef686e9a7876bbccdb868d029c76e 100644 --- a/hysop/backend/device/codegen/base/utils.py +++ b/hysop/backend/device/codegen/base/utils.py @@ -1,6 +1,5 @@ import hashlib - class WriteOnceDict(dict): def __init__(self,**kargs): super(WriteOnceDict,self).__init__(**kargs) diff --git a/hysop/backend/device/codegen/base/variables.py b/hysop/backend/device/codegen/base/variables.py index 3d454ea62c9eaa21bccdf9487c5e9c83f66f89b2..0eb544376ba7fd5470272084b7101889770d71e5 100644 --- a/hysop/backend/device/codegen/base/variables.py +++ b/hysop/backend/device/codegen/base/variables.py @@ -351,8 +351,13 @@ class CodegenVariable(object): # static array ctype needs to be split name = self.decl_name() - - init = init if (init is not None) else self.init + + if init==False: + msg='Const variable should be initialized at declaration.' + assert (not self.const) or (not self.add_impl_const), msg + init=None + else: + init = init if (init is not None) else self.init if (len(ctype)>0) and ctype[-1]=='*': code = '{}${}'.format(ctype, name) else: @@ -382,6 +387,22 @@ class CodegenVariable(object): codegen.append(code) return code.strip() + def affect(self, codegen=None, align=False, init=None, + compact=False): + msg='Cannot affect a const variable.' + assert (not self.const) or (not self.add_impl_const), msg + init = self.init if (init is None) else init + if compact: + code = '{}$={};' + else: + code = '{} $= {};' + code = code.format(self, init) + if not align: + code = code.replace('$','') + if codegen is not None: + codegen.append(code) + return code + def __getitem__(self,ss): if self.is_ptr: return '{}[{}]'.format(self.name,ss) @@ -736,14 +757,14 @@ class CodegenVectorClBuiltin(CodegenVector): else: raise TypeError, 'Invalid key type!' - def declare(self, init=None, **kargs): + def declare(self, codegen=None, init=None, **kargs): if isinstance(init,int): init = ','.join([self.typegen.dump(init) for _ in xrange(self.dim)]) init = '({})({})'.format(self.ctype,init) elif init.__class__ in [list,tuple,np.ndarray]: init = ','.join([self.typegen.dump(init[i]) for i in xrange(self.dim)]) init = '({})({})'.format(self.ctype,init) - return super(CodegenVectorClBuiltin,self).declare(init=init, **kargs) + return super(CodegenVectorClBuiltin,self).declare(init=init, codegen=codegen, **kargs) class CodegenVectorClBuiltinFunc(CodegenVectorClBuiltin): def __init__(self,fname,name,btype,dim,typegen, @@ -762,13 +783,12 @@ class CodegenVectorClBuiltinFunc(CodegenVectorClBuiltin): assert i<self.dim return 'get_{}({})'.format(self.fname,i) - def declare(self,codegen=None,align=False,const=None,init=None): + def declare(self, codegen=None, init=None, **kargs): if init is False: init = None else: init = self.fval() if not self.known() else init - return super(CodegenVectorClBuiltinFunc,self).declare(codegen=codegen,align=align, - const=const, init=init) + return super(CodegenVectorClBuiltinFunc,self).declare(init=init, codegen=codegen, **kargs) class CodegenStruct(CodegenVariable): def __init__(self, name, struct, diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py index 2199743e92c9df8a3f48b77aef871999744de610..adea049a7fe1e018d3b2b739056e6b305aba7e7a 100644 --- a/hysop/backend/device/codegen/kernels/directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/directional_remesh.py @@ -27,14 +27,13 @@ from hysop.backend.device.codegen.base.variables import CodegenStruct from hysop.backend.device.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct from hysop.backend.device.codegen.functions.compute_index import ComputeIndexFunction -from hysop.backend.device.codegen.functions.cache_load import CacheLoadFunction -from hysop.backend.device.codegen.functions.runge_kutta import RungeKuttaFunction from hysop.backend.device.opencl import cl, clCharacterize from hysop.backend.device.opencl.opencl_env import OpenClEnvironment from hysop.backend.device.opencl.opencl_kernel import OpenClKernelLauncher from hysop.backend.device.kernel_autotuner import KernelAutotuner, AutotunerFlags, \ KernelGenerationError + from hysop.fields.discrete import DiscreteField from hysop.core.arrays.all import OpenClArrayBackend from hysop.numerics.remesh.remesh import RemeshKernel @@ -142,6 +141,10 @@ class DirectionalRemeshKernel(KernelCodeGenerator): mesh_info_struct = MeshInfoStruct(typegen=typegen, typedef='MeshInfo_s') reqs['MeshInfoStruct'] = mesh_info_struct + compute_id = ComputeIndexFunction(typegen=typegen, dim=work_dim, itype=itype, + wrap=is_periodic) + reqs['compute_id'] = compute_id + return reqs def gen_kernel_arguments(self, typegen, work_dim, itype, ftype, @@ -209,6 +212,8 @@ class DirectionalRemeshKernel(KernelCodeGenerator): is_periodic = s.is_periodic local_size_known = s.local_size_known + compute_id = s.reqs['compute_id'] + global_id = s.vars['global_id'] local_id = s.vars['local_id'] group_id = s.vars['group_id'] @@ -228,15 +233,36 @@ class DirectionalRemeshKernel(KernelCodeGenerator): position = s.position scalars_in = s.scalars_in scalars_out = s.scalars_out - + + + #common to all scalars and position compute_grid_size = position_mesh_info['local_mesh']['compute_resolution'].view( 'compute_grid_size',slice(None,work_dim),const=True) - + + # may be topology specific + position_grid_size = position_mesh_info['local_mesh']['resolution'].view( + 'pos_grid_size', slice(0,work_dim), const=True) + scalars_grid_size = tuple(smi['local_mesh']['resolution'].view('S{}_grid_size'.format(i), + slice(0,work_dim), const=True) for (i,smi) in enumerate(scalars_mesh_info)) + position_grid_ghosts = position_mesh_info['ghosts'].view( 'pos_grid_ghosts', slice(0,work_dim), const=True) scalars_grid_ghosts = tuple(smi['ghosts'].view('S{}_grid_ghosts'.format(i), slice(0,work_dim), const=True) for (i,smi) in enumerate(scalars_mesh_info)) + position_GID = CodegenVariable('pos_global_id', itype, typegen=tg) + scalars_GID = tuple(CodegenVariable('S{}_global_id'.format(i), itype, typegen=tg) for i in xrange(nscalars)) + + position_global_id = CodegenVectorClBuiltin('pos_gid', itype, work_dim, typegen=tg) + scalars_global_id = tuple(CodegenVectorClBuiltin('S{}_gid'.format(i), + itype, work_dim, typegen=tg) for i in xrange(nscalars)) + + grid_ghosts = (position_grid_ghosts,) + scalars_grid_ghosts + grid_sizes = (position_grid_size,) + scalars_grid_size + global_ids = (position_global_id,) + scalars_global_id + GIDs = (position_GID,) + scalars_GID + + dx = position_mesh_info['dx'].view('dx', slice(0,1), const=True) inv_dx = position_mesh_info['inv_dx'].view('inv_dx', slice(0,1), const=True) xmin = position_mesh_info['local_mesh']['xmin'].view('xmin', slice(0,1), const=True) @@ -254,10 +280,6 @@ class DirectionalRemeshKernel(KernelCodeGenerator): line_velocity = CodegenVariable(name='line_velocity', ctype=ftype, ptr=True, storage='__global', ptr_restrict=True, ptr_const=True, const=True, typegen=tg) - position_global_id = CodegenVectorClBuiltin('pos_gid', itype, work_dim, typegen=tg) - scalars_global_id = tuple(CodegenVectorClBuiltin('S{}_gid'.format(i), - itype, work_dim, typegen=tg) for i in xrange(nscalars)) - npart = CodegenVariable(name='nparticles', ctype=itype, typegen=tg, value=nparticles) cache_ghosts = CodegenVariable('cache_ghosts', itype, typegen=tg, @@ -280,15 +302,18 @@ class DirectionalRemeshKernel(KernelCodeGenerator): for i in xrange(nscalars): Si = CodegenVariable(name='S{}'.format(i),ctype=ftype,typegen=tg, ptr_restrict=True, ptr=True, storage=self._local, - const=True, + ptr_const=True, init='{} + {}*{}'.format(buf,i,cache_width)) cached_scalars.append(Si) - first = CodegenVariable('first','bool',tg,init='true') - active = CodegenVariable('active','bool',tg) + first = CodegenVariable('first','bool',tg,init='true') + active = CodegenVariable('active','bool',tg) particle_offset = CodegenVariable('particle_offset',itype,tg) + pos = CodegenVectorClBuiltin('p', ftype, nparticles, tg) + scalars = [CodegenVectorClBuiltin('s{}'.format(i), ftype, nparticles, tg) for i in xrange(nscalars)] + @contextlib.contextmanager def _work_iterate_(i): try: @@ -315,7 +340,8 @@ class DirectionalRemeshKernel(KernelCodeGenerator): active.declare(s, init='({} < {})'.format(particle_offset, compute_grid_size[0])) elif i==1: first.declare(s) - + s.jumpline() + with s._align_() as al: for field_gid, field_ghosts in zip((position_global_id,)+scalars_global_id, (position_grid_ghosts,)+scalars_grid_ghosts): @@ -325,13 +351,11 @@ class DirectionalRemeshKernel(KernelCodeGenerator): else: al.append('{} $= {} $+ {};'.format(field_gid[i], loopvars[i], field_ghosts[i])) - - + al.jumpline() yield ctx except: raise - with s._kernel_(): s.jumpline() @@ -346,10 +370,10 @@ class DirectionalRemeshKernel(KernelCodeGenerator): s.decl_aligned_vars(npart, cache_ghosts, cache_width, local_work, const=True) - s.decl_aligned_vars(compute_grid_size, position_grid_ghosts, *scalars_grid_ghosts) - - s.decl_vars(position_global_id, *scalars_global_id) + s.decl_vars(compute_grid_size) s.jumpline() + s.decl_aligned_vars(*grid_sizes) + s.decl_aligned_vars(*grid_ghosts) if local_size_known: s.decl_vars(*cached_scalars) @@ -357,13 +381,26 @@ class DirectionalRemeshKernel(KernelCodeGenerator): else: s.decl_aligned_vars(*cached_scalars) - + with s._align_() as al: + s.decl_vars(*global_ids, align=True, codegen=al) + s.decl_vars(*GIDs, align=True, codegen=al) + s.decl_vars(pos, *scalars, align=True, codegen=al) + s.jumpline() nested_loops = [_work_iterate_(i) for i in xrange(dim-1,-1,-1)] with contextlib.nested(*nested_loops): - # s.barrier(_local=True) - # s.jumpline() - pass + with s._align_() as al: + for (_GID, _global_id, _grid_size) in zip(GIDs, global_ids, grid_sizes): + al.append('{} $= {};'.format(_GID, + compute_id(idx=_global_id, size=_grid_size))) + s.jumpline() + + with s._align_() as al: + pos.affect(al, align=True, init=position[position_GID()]) + for si, sin, sgid in zip(scalars, scalars_in, scalars_GID): + si.affect(al, align=True, init=sin[sgid]) + s.jumpline() + if __name__ == '__main__':