From 78938143da029dceeccdc3e5dc25bbbe53dce0dc Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr> Date: Fri, 23 Dec 2016 16:09:23 +0100 Subject: [PATCH] working advection, periodic non cached --- .../codegen/kernels/directional_advection.py | 81 ++++++++++++------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/hysop/codegen/kernels/directional_advection.py b/hysop/codegen/kernels/directional_advection.py index ada57d960..c2f36e6fd 100644 --- a/hysop/codegen/kernels/directional_advection.py +++ b/hysop/codegen/kernels/directional_advection.py @@ -44,15 +44,16 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): ftype[0],nparticles,min_ghosts) @staticmethod - def min_ghosts(dx, max_velocity): - return math.ceil(max_velocity / dx) + def min_ghosts(max_dt, velocity_dx, max_velocity): + return int(math.ceil(max_dt*umax/float(velocity_dx))) def __init__(self, typegen, work_dim, ftype, is_cached, rk_scheme, boundary, nparticles, min_ghosts=0, symbolic_mode = False, - known_vars = None): + debug_mode = False, + known_vars = None): assert work_dim>0 and work_dim<=3 assert nparticles in [1,2,4,8,16] @@ -79,10 +80,10 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): name = DirectionalAdvectionKernel.codegen_name(ftype,is_cached,rk_scheme,nparticles,min_ghosts) kernel_reqs = self.build_requirements(typegen, work_dim, itype, ftype, is_cached, - rk_scheme, boundary, nparticles, symbolic_mode, storage, is_periodic) + rk_scheme, boundary, nparticles, symbolic_mode, storage, is_periodic, known_vars) kernel_args = self.gen_kernel_arguments(typegen, work_dim, itype, ftype, kernel_reqs, is_cached, - local_size_known) + local_size_known, debug_mode) super(DirectionalAdvectionKernel,self).__init__( name=name, @@ -127,9 +128,11 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): #return global_work_size from effective work_size without # taking into account local_work_size alignment @staticmethod - def get_max_global_size(work_size, work_load): + def get_max_global_size(work_size, work_load, nparticles): work_size = np.asarray(work_size) work_load = np.asarray(work_load) + + assert work_load[0] == 1 global_size = work_size.copy() global_size = ((global_size+work_load-1)/work_load) @@ -137,17 +140,28 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): #return global_work_size from effective work_size and given local_work_size # global_work_size will be a multiple of local_work_size - def get_global_size(self, work_size, local_work_size, work_load=[1,1,1]): + def get_global_size(self, work_size, local_work_size, work_load=None): + work_dim = self.work_dim + work_load = [1]*work_dim if (work_load is None) else work_load + work_size = np.asarray(work_size) work_load = np.asarray(work_load) local_work_size = np.asarray(local_work_size) + + nparticles = self.nparticles - assert (local_work_size[1] == 1) and (local_work_size[2]==1) + for var in [work_size, work_load, local_work_size]: + msg = 'Dimension mismatch for variable {}, expected work_dim is {}.' + msg = msg.format(var, work_dim) + if var.size != work_dim: + raise ValueError(msg) + for i in xrange(1,work_dim): + assert (local_work_size[i] == 1), 'local_work_size error!' if 'local_size' in self.known_vars: - assert (self.known_vars['local_size'] == local_work_size).all() + assert (self.known_vars['local_size'] == local_work_size).all(), 'local_work_size mismatch!' - max_global_size = self.get_max_global_size(work_size, work_load) + max_global_size = self.get_max_global_size(work_size, work_load, nparticles) global_size = ((max_global_size+local_work_size-1)/local_work_size) * local_work_size return global_size @@ -178,20 +192,15 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): return (sc,dc,tc) def build_requirements(self,typegen,work_dim,itype,ftype,is_cached,rk_scheme, - boundary,nparticles,force_symbolic,storage,is_periodic): + boundary,nparticles,force_symbolic,storage,is_periodic,known_vars): tg=typegen reqs = WriteOnceDict() - # compute_id = ComputeIndexFunction(typegen=typegen, dim=work_dim, itype='int', - # wrap=is_periodic) - # reqs['compute_id'] = compute_id - mesh_base_struct = MeshBaseStruct(typegen=typegen, typedef='MeshBase_s') reqs['MeshBaseStruct'] = mesh_base_struct mesh_info_struct = MeshInfoStruct(typegen=typegen, typedef='MeshInfo_s') reqs['MeshInfoStruct'] = mesh_info_struct - advection_rhs = DirectionalAdvectionRhsFunction(typegen=typegen, work_dim=work_dim, ftype=ftype, is_cached=is_cached, @@ -212,7 +221,7 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): return reqs def gen_kernel_arguments(self, typegen, work_dim, itype, ftype, requirements,is_cached, - local_size_known): + local_size_known, debug_mode): kargs = ArgDict() kargs['dt'] = CodegenVariable(ctype=ftype,name='dt',typegen=typegen, @@ -225,12 +234,18 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): restrict=True,ptr=True,const=True,add_impl_const=True) kargs['position'] = CodegenVariable(storage=_global,name='position',ctype=ftype,typegen=typegen, restrict=True,ptr=True,const=False,add_impl_const=True) + + if debug_mode: + kargs['dbg0'] = CodegenVariable(storage=_global,name='dbg0',ctype=itype,typegen=typegen, + restrict=True,ptr=True,const=False,add_impl_const=True) + kargs['dbg1'] = CodegenVariable(storage=_global,name='dbg1',ctype=itype,typegen=typegen, + restrict=True,ptr=True,const=False,add_impl_const=True) kargs['mesh_info'] = requirements['MeshInfoStruct'].build_codegen_variable(const=True,name='mesh_info') if is_cached and not local_size_known: - kargs['buffer'] = CodegenVariable(storage=_local,ctype=ftype, add_impl_const=True, - name='buffer', ptr=True, restrict=True, typegen=typegen, nl=False) + kargs['Vc'] = CodegenVariable(storage=_local,ctype=ftype, add_impl_const=True, + name='Vc', ptr=True, restrict=True, typegen=typegen, nl=False) return kargs @@ -268,8 +283,6 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): velocity = s.vars['velocity'] position = s.vars['position'] - if is_cached: - shared = s.vars['buffer'] grid_size = mesh_info['local_mesh']['resolution'].view( 'grid_size', slice(None,work_dim)) @@ -291,6 +304,16 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): pid = CodegenVectorClBuiltin('pid', itype, nparticles, typegen=tg) poffset = CodegenVectorClBuiltin('poffset', itype, nparticles, typegen=tg) + if is_cached: + cache_ghosts = CodegenVariable('cache_ghosts', itype, typegen=tg, value=min_ghosts) + cache_width = CodegenVariable('cache_width', itype, typegen=tg) + if local_size_known: + local_size = s.known_vars['local_size'] + Vc = CodegenArray(name='Vc',dim=1,ctype=ftype,typegen=tg, + shape=(local_size.value[0]+2*min_ghosts,), storage='__local') + else: + Vc = s.vars['Vc'] + part_ftype = X.ctype part_itype = pid.ctype @@ -377,13 +400,15 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): s.append(code) s.append('{} += {};'.format(X(), xmin())) - - if nparticles>1: - code = 'vstore{}({}, {}, {}+{});'.format(nparticles,X(), - global_id[0], position(), line_index()) - else: - code = '{} = {};'.format(position['{}+{}'.format(line_index(),pid())],X()) - s.append(code) + + with s._if_(active()): + if nparticles>1: + code = 'vstore{}({}, {}, {}+{});'.format(nparticles,X(), + global_id[0], position(), line_index()) + s.append(code) + else: + code = '{} = {};'.format(position['{}+{}'.format(line_index(),pid())],X()) + s.append(code) -- GitLab