diff --git a/examples/taylor_green/taylor_green_monoscale.py b/examples/taylor_green/taylor_green_monoscale.py index c6812c83251ca3c88b418b43a921a110e3437e02..3f66aa7b61a4d380c3b9fcf59ed27b99ab99c974 100644 --- a/examples/taylor_green/taylor_green_monoscale.py +++ b/examples/taylor_green/taylor_green_monoscale.py @@ -62,7 +62,7 @@ def do_solve(npts, viscosity=1./1600., lcfl=0.125, cfl=0.50): name='advec', velocity = velo, advected_fields = (vorti,), - cfl = cfl, + velocity_cfl = cfl, variables = {velo: topo_ghosts, vorti: topo_ghosts}, ) stretch = DirectionalStretching( diff --git a/hysop/backend/device/codegen/base/variables.py b/hysop/backend/device/codegen/base/variables.py index b932cc215ce58e112ef4ee6d670f8ca44284bc5d..943a08fab9253ea63499512c5dd2c13d46a567af 100644 --- a/hysop/backend/device/codegen/base/variables.py +++ b/hysop/backend/device/codegen/base/variables.py @@ -66,7 +66,7 @@ class CodegenVariable(object): add_impl_const=False, nl=None, ptr=False, ptr_restrict=None, ptr_volatile=None, ptr_const=None, value=None, svalue=None, init=None, - symbolic_mode=False,struct_var=None): + symbolic_mode=False, struct_var=None): check_instance(typegen, TypeGen) @@ -197,9 +197,9 @@ class CodegenVariable(object): def pointer_alias(self, name, ctype, **kargs): handle = self.newvar(name=name, ctype=ctype, - init='({})({})'.format( - self.full_ctype(cast=True,ctype=ctype), self), - **kargs) + init='({})({})'.format( + self.full_ctype(cast=True,ctype=ctype), self), + **kargs) return handle def pointer(self, name, ptr_level, @@ -819,7 +819,7 @@ class CodegenStruct(CodegenVariable): struct_var=struct_var) self.genvars(struct,var_overrides) - + def force_symbolic(self, force=True): if force is None: return diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py index 58ef57488f19a8b508183e0f8f4ef2121cd431d0..562e1ada679c8d4d0b17ee7339bda70762e9eb27 100644 --- a/hysop/backend/device/codegen/kernels/directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/directional_remesh.py @@ -11,7 +11,7 @@ from hysop.tools.misc import Utils from hysop.tools.types import check_instance from hysop.deps import np -from hysop.constants import DirectionLabels, BoundaryCondition, Backend, Precision +from hysop.constants import DirectionLabels, BoundaryCondition, Backend from hysop.core.arrays.all import OpenClArray from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator @@ -32,6 +32,7 @@ from hysop.backend.device.opencl.opencl_kernel import OpenClKernelLauncher from hysop.backend.device.kernel_autotuner import KernelAutotuner, AutotunerFlags, \ KernelGenerationError +from hysop.fields.continuous import Field from hysop.fields.discrete import DiscreteField from hysop.core.arrays.all import OpenClArrayBackend from hysop.numerics.remesh.remesh import RemeshKernel @@ -64,7 +65,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): return min_ghosts @classmethod - def min_wg_size(work_dim, scalar_cfl, remesh_kernel): + def min_wg_size(cls, work_dim, scalar_cfl, remesh_kernel): size = [1]*work_dim min_ghosts = cls.scalars_out_cache_ghosts(scalar_cfl, remesh_kernel) size[0] = 2*min_ghosts+1 @@ -143,6 +144,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): def __init__(self, typegen, work_dim, ftype, nparticles, nscalars, sboundary, is_inplace, scalar_cfl, remesh_kernel, + group_scalars=None, remesh_criteria_eps=None, use_atomics = False, symbolic_mode = False, @@ -154,6 +156,11 @@ class DirectionalRemeshKernel(KernelCodeGenerator): assert nparticles in [1,2,4,8,16] check_instance(sboundary,tuple,values=BoundaryCondition) check_instance(remesh_kernel, RemeshKernel) + + group_scalars = group_scalars or tuple(1 for _ in xrange(nscalars)) + check_instance(group_scalars, tuple, values=int) + assert sum(group_scalars) == nscalars + nfields = len(group_scalars) assert sboundary[0] in [BoundaryCondition.PERIODIC, BoundaryCondition.NONE] assert sboundary[1] in [BoundaryCondition.PERIODIC, BoundaryCondition.NONE] @@ -169,17 +176,17 @@ class DirectionalRemeshKernel(KernelCodeGenerator): name = DirectionalRemeshKernel.codegen_name(work_dim, remesh_kernel, ftype, - nparticles,nscalars, remesh_criteria_eps, + nparticles, nscalars, remesh_criteria_eps, use_atomics, is_inplace) kernel_reqs = self.build_requirements(typegen, work_dim, itype, ftype, - sboundary, nparticles, nscalars, + sboundary, nparticles, nscalars, nfields, group_scalars, symbolic_mode, is_periodic, remesh_criteria_eps, use_atomics, remesh_kernel, known_vars, debug_mode) kernel_args = self.gen_kernel_arguments(typegen, work_dim, itype, ftype, - nparticles, nscalars, local_size_known, is_inplace, + nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace, debug_mode, kernel_reqs) super(DirectionalRemeshKernel,self).__init__( @@ -202,6 +209,8 @@ class DirectionalRemeshKernel(KernelCodeGenerator): self.sboundary = sboundary self.nparticles = nparticles self.nscalars = nscalars + self.nfields = nfields + self.group_scalars = group_scalars self.local_size_known = local_size_known self.is_periodic = is_periodic self.is_inplace = is_inplace @@ -212,7 +221,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): self.gencode() def build_requirements(self, typegen, work_dim, itype, ftype, - sboundary, nparticles, nscalars, symbolic_mode, is_periodic, + sboundary, nparticles, nscalars, nfields, group_scalars, symbolic_mode, is_periodic, remesh_criteria_eps, use_atomics, remesh_kernel, known_vars, debug_mode): reqs = WriteOnceDict() @@ -233,29 +242,47 @@ class DirectionalRemeshKernel(KernelCodeGenerator): return reqs def gen_kernel_arguments(self, typegen, work_dim, itype, ftype, - nparticles, nscalars, local_size_known, is_inplace, + nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace, debug_mode, kernel_reqs): kargs = ArgDict() self.position = OpenClArrayBackend.build_codegen_argument(kargs, name='position', storage=self._global, ctype=ftype, typegen=typegen, ptr_restrict=True, const=True) - if is_inplace: - self.scalars_in = tuple( - OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_inout'.format(i), - storage=self._global, ctype=ftype, typegen=typegen, - const=False, ptr_restrict=True) for i in xrange(nscalars)) - self.scalars_out = self.scalars_in - else: - self.scalars_in = tuple( - OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_in'.format(i), + + scalars_in = [] + for i in xrange(nfields): + args_in = [] + for j in xrange(group_scalars[i]): + if is_inplace: + arg = OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_{}_inout'.format(i,j), storage=self._global, ctype=ftype, typegen=typegen, - const=True, ptr_restrict=True) for i in xrange(nscalars)) - self.scalars_out = tuple( - OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_out'.format(i), + const=False, ptr_restrict=True) + else: + arg = OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_{}_in'.format(i,j), storage=self._global, ctype=ftype, typegen=typegen, - const=False, ptr_restrict=True) for i in xrange(nscalars)) + const=True, ptr_restrict=True) + args_in.append(arg) + scalars_in.append(tuple(args_in)) + scalars_in = tuple(scalars_in) + if is_inplace: + scalars_out = scalars_in + else: + scalars_out = [] + for i in xrange(nfields): + args_out = [] + for j in xrange(group_scalars[i]): + arg_out = OpenClArrayBackend.build_codegen_argument(kargs, name='S{}_{}_out'.format(i,j), + storage=self._global, ctype=ftype, typegen=typegen, + const=False, ptr_restrict=True) + args_out.append(arg_out) + scalars_out.append(tuple(args_out)) + scalars_out = tuple(scalars_out) + + self.scalars_in = scalars_in + self.scalars_out = scalars_out + if debug_mode: kargs['dbg0'] = CodegenVariable(storage=self._global,name='dbg0',ctype=itype, typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True) @@ -265,14 +292,20 @@ class DirectionalRemeshKernel(KernelCodeGenerator): kargs['position_mesh_info'] = kernel_reqs['MeshInfoStruct'].build_codegen_variable( const=True, name='position_mesh_info') - for i in xrange(nscalars): - kargs['S{}_out_mesh_info'.format(i)] = \ - kernel_reqs['MeshInfoStruct'].build_codegen_variable( - const=True, name='S{}_out_mesh_info'.format(i), nl=True) - if not is_inplace: + if is_inplace: + for i in xrange(nfields): + kargs['S{}_inout_mesh_info'.format(i)] = \ + kernel_reqs['MeshInfoStruct'].build_codegen_variable( + const=True, name='S{}_inout_mesh_info'.format(i), nl=True) + else: + for i in xrange(nfields): kargs['S{}_in_mesh_info'.format(i)] = \ kernel_reqs['MeshInfoStruct'].build_codegen_variable( const=True, name='S{}_in_mesh_info'.format(i), nl=True) + for i in xrange(nfields): + kargs['S{}_out_mesh_info'.format(i)] = \ + kernel_reqs['MeshInfoStruct'].build_codegen_variable( + const=True, name='S{}_out_mesh_info'.format(i), nl=True) if not local_size_known: kargs['buffer'] = CodegenVariable(storage=self._local, ctype=ftype, @@ -293,6 +326,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): sboundary = s.sboundary nparticles = s.nparticles nscalars = s.nscalars + nfields = s.nfields min_ghosts = s.min_ghosts is_inplace = s.is_inplace use_atomics = s.use_atomics @@ -300,6 +334,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): debug_mode = s.debug_mode symbolic_mode = s.symbolic_mode + group_scalars = s.group_scalars is_periodic = s.is_periodic local_size_known = s.local_size_known @@ -319,11 +354,13 @@ class DirectionalRemeshKernel(KernelCodeGenerator): dbg1 = s.vars['dbg1'] position_mesh_info = s.vars['position_mesh_info'] - scalars_mesh_info_out = [s.vars['S{}_out_mesh_info'.format(i)] for i in xrange(nscalars)] - if not is_inplace: - scalars_mesh_info_in = [s.vars['S{}_in_mesh_info'.format(i)] for i in xrange(nscalars)] + + if is_inplace: + scalars_mesh_info_in = [s.vars['S{}_inout_mesh_info'.format(i)] for i in xrange(nfields)] + scalars_mesh_info_out = scalars_mesh_info_in else: - scalars_mesh_info_in = scalars_mesh_info_out + scalars_mesh_info_in = [s.vars['S{}_in_mesh_info'.format(i)] for i in xrange(nfields)] + scalars_mesh_info_out = [s.vars['S{}_out_mesh_info'.format(i)] for i in xrange(nfields)] position_base = s.vars['position_base'] position_offset = s.vars['position_offset'] @@ -354,7 +391,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): for (i,smi) in enumerate(scalars_mesh_info_out)) scalars_in_global_id = tuple(CodegenVectorClBuiltin('S{}_inout_gid'.format(i), - itype, work_dim, typegen=tg) for i in xrange(nscalars)) + itype, work_dim, typegen=tg) for i in xrange(nfields)) scalars_out_grid_size = scalars_in_grid_size scalars_out_grid_ghosts = scalars_in_grid_ghosts @@ -377,9 +414,9 @@ class DirectionalRemeshKernel(KernelCodeGenerator): slice(0,work_dim), const=True) for (i,smi) in enumerate(scalars_mesh_info_out)) scalars_in_global_id = tuple(CodegenVectorClBuiltin('S{}_in_gid'.format(i), - itype, work_dim, typegen=tg) for i in xrange(nscalars)) + itype, work_dim, typegen=tg) for i in xrange(nfields)) scalars_out_global_id = tuple(CodegenVectorClBuiltin('S{}_out_gid'.format(i), - itype, work_dim, typegen=tg) for i in xrange(nscalars)) + itype, work_dim, typegen=tg) for i in xrange(nfields)) grid_ghosts = (position_grid_ghosts,) + scalars_in_grid_ghosts + \ scalars_out_grid_ghosts @@ -389,9 +426,9 @@ class DirectionalRemeshKernel(KernelCodeGenerator): s.update_vars(position=position, inv_dx=inv_dx, position_grid_ghosts=position_grid_ghosts, compute_grid_size=compute_grid_size) - s.update_vars(**dict(zip([sin.name for sin in scalars_in], scalars_in))) + s.update_vars(**dict((sij.name,sij) for si in scalars_in for sij in si)) if not is_inplace: - s.update_vars(**dict(zip([sout.name for sout in scalars_out], scalars_out))) + s.update_vars(**dict((sij.name,sij) for si in scalars_out for sij in si)) npart = CodegenVariable(name='nparticles', ctype=itype, typegen=tg, value=nparticles) @@ -422,60 +459,92 @@ class DirectionalRemeshKernel(KernelCodeGenerator): line_position = position.newvar('line_position', init=_line_init(position, position_global_id, position_grid_size)) - line_scalars_in = tuple(sin.newvar('line_{}'.format(sin.name), - init=_line_init(sin, sin_global_id, sin_grid_size)) - for (sin, sin_global_id, sin_grid_size) - in zip(scalars_in, scalars_in_global_id, scalars_in_grid_size)) - line_scalars_out = tuple(sout.newvar('line_{}'.format(sout.name), - init=_line_init(sout, sout_global_id, sout_grid_size)) - for (sout, sout_global_id, sout_grid_size) - in zip(scalars_out, scalars_out_global_id, scalars_out_grid_size)) - line_vars = (line_position,) + line_scalars_in + + line_scalars_in = [] + line_scalars_out = [] + for (Si, Si_global_id, Si_grid_size) in \ + zip(scalars_in, scalars_in_global_id, scalars_in_grid_size): + Li = [] + for Sij in Si: + lij = Sij.newvar('line_{}'.format(Sij.name), + init=_line_init(Sij, Si_global_id, Si_grid_size)) + Li.append(lij) + line_scalars_in.append(tuple(Li)) + for (Si, Si_global_id, Si_grid_size) in \ + zip(scalars_out, scalars_out_global_id, scalars_out_grid_size): + Li = [] + for Sij in Si: + lij = Sij.newvar('line_{}'.format(Sij.name), + init=_line_init(Sij, Si_global_id, Si_grid_size)) + Li.append(lij) + line_scalars_out.append(tuple(Li)) + line_scalars_in = tuple(line_scalars_in) + line_scalars_out = tuple(line_scalars_out) + + line_vars = ((line_position,),) + line_scalars_in if not is_inplace: line_vars += line_scalars_out - - cached_scalars = [] boundary_scalars = [] if local_size_known: L = s.known_vars['local_size'] - for i in xrange(nscalars): - Si = CodegenArray(name='S{}'.format(i), - dim=1, ctype=ftype, typegen=tg, - shape=(nparticles*L[0]+2*min_ghosts,), storage=self._local) - cached_scalars.append(Si) - if is_periodic: - for i in xrange(nscalars): - BSi = CodegenArray(name='boundary_S{}'.format(i), + for i in xrange(nfields): + Si = [] + BSi = [] + for j in xrange(group_scalars[i]): + Sij = CodegenArray(name='S{}_{}'.format(i,j), dim=1, ctype=ftype, typegen=tg, - shape=(2*min_ghosts,), storage=self._local) - boundary_scalars.append(BSi) + shape=(nparticles*L[0]+2*min_ghosts,), storage=self._local) + Si.append(Sij) + + if is_periodic: + BSij = CodegenArray(name='boundary_S{}_{}'.format(i,j), + dim=1, ctype=ftype, typegen=tg, + shape=(2*min_ghosts,), storage=self._local) + BSi.append(BSij) + cached_scalars.append(tuple(Si)) + if is_periodic: + boundary_scalars.append(tuple(BSi)) else: buf = self.vars['buffer'] - for i in xrange(nscalars): - Si = CodegenVariable(name='S{}'.format(i),ctype=ftype,typegen=tg, - ptr_restrict=True, ptr=True, storage=self._local, - ptr_const=True, - init='{} + {}*{}'.format(buf,i,cache_width)) - cached_scalars.append(Si) - if is_periodic: - for i in xrange(nscalars): - BSi = CodegenVariable(name='boundary_S{}'.format(i), ctype=ftype, typegen=tg, + k=0 + for i in xrange(nfields): + Si = [] + BSi = [] + for j in xrange(group_scalars[i]): + Sij = CodegenVariable(name='S{}_{}'.format(i,j),ctype=ftype,typegen=tg, ptr_restrict=True, ptr=True, storage=self._local, ptr_const=True, - init='{} + {}*{} + 2*{}*{}'.format(buf, nscalars, cache_width, i, min_ghosts)) - boundary_scalars.append(BSi) - + init='{} + {}*{}'.format(buf,k,cache_width)) + Si.append(Sij) + + if is_periodic: + BSij = CodegenVariable(name='boundary_S{}_{}'.format(i,j), ctype=ftype, typegen=tg, + ptr_restrict=True, ptr=True, storage=self._local, + ptr_const=True, + init='{} + {}*{} + 2*{}*{}'.format(buf, nscalars, cache_width, k, min_ghosts)) + BSi.append(BSij) + k+=1 + cached_scalars.append(tuple(Si)) + if is_periodic: + boundary_scalars.append(tuple(BSi)) + cache_scalars = tuple(cached_scalars) + boundary_scalars = tuple(boundary_scalars) pos = CodegenVectorClBuiltin('p', ftype, nparticles, tg) - scalars = [CodegenVectorClBuiltin('s{}'.format(i), ftype, nparticles, tg) - for i in xrange(nscalars)] + scalars = [] + for i in xrange(nfields): + si = [] + for j in xrange(group_scalars[i]): + sij = CodegenVectorClBuiltin('s{}_{}'.format(i,j), ftype, nparticles, tg) + si.append(sij) + scalars.append(tuple(si)) + scalars = tuple(scalars) + vzero = CodegenVectorClBuiltin('vzero', ftype, nparticles, tg, value=np.zeros(shape=(nparticles,), dtype=np.float64)) - #voffset = CodegenVectorClBuiltin('voffset', itype, nparticles, tg, - #value=np.arange(nparticles, dtype=np.int32)) kmax = CodegenVariable('kmax', itype, tg, const=True, init='(({}+{lwork}-1)/{lwork})'.format( @@ -569,10 +638,10 @@ class DirectionalRemeshKernel(KernelCodeGenerator): with s._kernel_(): s.jumpline() - if is_inplace: - s.decl_aligned_vars(*((position,)+scalars_in)) - else: - s.decl_aligned_vars(*((position,)+scalars_in+scalars_out)) + vars_ = ((position,),)+scalars_in + if not is_inplace: + vars_ += scalars_out + s.decl_aligned_vars(*tuple(vij for vi in vars_ for vij in vi)) s.decl_aligned_vars(local_id, global_size, local_size, const=True) @@ -588,20 +657,17 @@ class DirectionalRemeshKernel(KernelCodeGenerator): s.decl_aligned_vars(*grid_sizes) s.decl_aligned_vars(*grid_ghosts) - if local_size_known: - s.decl_vars(*cached_scalars) - if is_periodic: - s.decl_vars(*boundary_scalars) - s.jumpline() - else: - s.decl_aligned_vars(*cached_scalars) - if is_periodic: - s.decl_aligned_vars(*boundary_scalars) + for vargroup in cached_scalars: + s.decl_vars(*vargroup) + if is_periodic: + for vargroup in boundary_scalars: + s.decl_vars(*vargroup) + s.jumpline() s.decl_vars(*global_ids) with s._align_() as al: - s.decl_vars(pos, *scalars, align=True, codegen=al) + s.decl_vars(pos, *tuple(sij for si in scalars for sij in si), align=True, codegen=al) s.jumpline() @@ -624,57 +690,61 @@ class DirectionalRemeshKernel(KernelCodeGenerator): init='{} + {} - {}'.format(particle_offset, _ghosts[0], cache_ghosts if not is_periodic else 0)) s.jumpline() - s.decl_aligned_vars(*line_vars) - + s.decl_aligned_vars(*(lvij for lvi in line_vars for lvij in lvi)) s.comment('Get back left cache from right cache') with s._if_('({} < 2*{})'.format(local_id[0], cache_ghosts)): with s._if_('{}'.format(first)): with s._align_() as al: - for cs in cached_scalars: - cs.affect(al, align=True, - i=local_id[0], - init=tg.dump(+0.0)) + for csi in cached_scalars: + for csij in csi: + csij.affect(al, align=True, + i=local_id[0], + init=tg.dump(+0.0)) with s._else_(): end_id='{}+{}'.format(local_work, local_id[0]) if debug_mode: - s.append('printf("%i loaded back %f from position %i.\\n", {}, {}, {});'.format( - local_id[0], cached_scalars[0][end_id], end_id)) + s.append('printf("%i loaded back %f from position %i.\\n", {}, {}, {});'.format(local_id[0], cached_scalars[0][0][end_id], end_id)) with s._align_() as al: - for cs in cached_scalars: - cs.affect(al, align=True, - i='{}'.format(local_id[0]), - init=cs[end_id]) + for csi in cached_scalars: + for csij in csi: + csij.affect(al, align=True, + i='{}'.format(local_id[0]), + init=csij[end_id]) s.jumpline() s.comment('Fill right cache with zeros, excluding left ghosts') - for al, active_cond, j in if_last_thread_active(): - for cs in cached_scalars: - offset = '{}+2*{}+${}'.format(local_offset, cache_ghosts, j) - store = s.vstore(1, cs, offset, vzero[j], align=True, - suppress_semicolon=True) - code = '{} $&& ({});'.format(active_cond, store) - al.append(code) + for al, active_cond, k in if_last_thread_active(): + for csi in cached_scalars: + for csij in csi: + offset = '{}+2*{}+${}'.format(local_offset, cache_ghosts, k) + store = s.vstore(1, csij, offset, vzero[k], align=True, + suppress_semicolon=True) + code = '{} $&& ({});'.format(active_cond, store) + al.append(code) with if_thread_active(): - for cs in cached_scalars: - offset = '{}+2*{}'.format(local_offset, cache_ghosts) - s.append(s.vstore(nparticles, cs, offset, vzero)) + for csi in cached_scalars: + for csij in csi: + offset = '{}+2*{}'.format(local_offset, cache_ghosts) + s.append(s.vstore(nparticles, csij, offset, vzero)) s.jumpline() s.comment('Load position and scalars at current index, {} particles at a time.'.format(nparticles)) - for al, active_cond, j in if_last_thread_active(): - load = self.vload(1, line_position, '{}+{}'.format(particle_offset,j), align=True) - code = '{} $= ({} $? {} $: {});'.format(pos[j], active_cond, load, tg.dump(0.0)) + for al, active_cond, k in if_last_thread_active(): + load = self.vload(1, line_position, '{}+{}'.format(particle_offset,k), align=True) + code = '{} $= ({} $? {} $: {});'.format(pos[k], active_cond, load, tg.dump(0.0)) al.append(code) if is_inplace and not is_periodic: - _id = '{}+{}+{}'.format(particle_offset, cache_ghosts, j) + _id = '{}+{}+{}'.format(particle_offset, cache_ghosts, k) else: - _id = '{}+{}'.format(particle_offset, j) - for si, line_sin in zip(scalars, line_scalars_in): - load = self.vload(1, line_sin, _id, align=True) - code = '{} $= ({} $? {} $: {});'.format(si[j], active_cond, load, tg.dump(0.0)) - al.append(code) + _id = '{}+{}'.format(particle_offset, k) + for si, line_si in zip(scalars, line_scalars_in): + for sij, line_sij in zip(si, line_si): + load = self.vload(1, line_sij, _id, align=True) + code = '{} $= ({} $? {} $: {});'.format(sij[k], active_cond, load, tg.dump(0.0)) + al.append(code) + with if_thread_active(): with s._align_() as al: pos.affect(al, align=True, @@ -684,9 +754,10 @@ class DirectionalRemeshKernel(KernelCodeGenerator): _id = '{}+{}'.format(particle_offset, cache_ghosts) else: _id = particle_offset - for si, line_sin in zip(scalars, line_scalars_in): - si.affect(al, align=True, - init=self.vload(nparticles, line_sin, _id,align=True)) + for si, line_si in zip(scalars, line_scalars_in): + for sij, line_sij in zip(si, line_si): + sij.affect(al, align=True, + init=self.vload(nparticles, line_sij, _id, align=True)) s.barrier(_local=True) s.jumpline() @@ -700,45 +771,57 @@ class DirectionalRemeshKernel(KernelCodeGenerator): 'active': active, 'line_offset': line_offset } - for i,cached_scalar in enumerate(cached_scalars): - remesh_kargs['S{}'.format(i)] = cached_scalar + + k=0 + for ci in cached_scalars: + for cij in ci: + remesh_kargs['S{}'.format(k)] = cij + k+=1 if is_periodic: remesh_kargs['grid_size'] = compute_grid_size[0] if use_atomics: # with atomic summation in cache, we can remesh everything at a time remesh_kargs['p'] = pos - for i,scalar in enumerate(scalars): - remesh_kargs['s{}'.format(i)] = scalar + k=0 + for si in scalars: + for sij in si: + remesh_kargs['s{}'.format(k)] = sij + k+=1 call = remesh(**remesh_kargs) s.append('{};'.format(call)) else: # without atomics we can only remesh on particle at a time - for j in xrange(nparticles): - remesh_kargs['p'] = pos[j] - for i,scalar in enumerate(scalars): - remesh_kargs['s{}'.format(i)] = scalar[j] + for p in xrange(nparticles): + remesh_kargs['p'] = pos[p] + k=0 + for si in scalars: + for sij in si: + remesh_kargs['s{}'.format(k)] = sij[p] + k+=1 call = remesh(**remesh_kargs) s.append('{};'.format(call)) + s.jumpline() s.comment('Store back remeshed scalars to global memory') - for al, active_cond, j in if_first_or_last_thread_active(): - for sc, line_sout in zip(cached_scalars, line_scalars_out): - load = self.vload(1, sc, '{}+{}'.format(local_offset, j)) - if is_periodic: - _id = '({}+{}+{}-{})%{}'.format(particle_offset, compute_grid_size[0], j, cache_ghosts, compute_grid_size[0]) - else: - _id = '{}+{}'.format(particle_offset, j) - store = self.vstore(1, line_sout, _id, load, align=True, jmp=True, suppress_semicolon=True) - code = '{} $&& ({})'.format(active_cond, store) - if debug_mode: - code += ' && (printf("last_active %i wrote %2.2f from cache position %i to global id %i.\\n", {},{},{},{}));'.format( - local_id[0], load, '{}+{}'.format(local_offset,j), _id) - else: - code += ';' - al.append(code) + for al, active_cond, k in if_first_or_last_thread_active(): + for si, li in zip(cached_scalars, line_scalars_out): + for sij, lij in zip(si, li): + load = self.vload(1, sij, '{}+{}'.format(local_offset, k)) + if is_periodic: + _id = '({}+{}+{}-{})%{}'.format(particle_offset, compute_grid_size[0], k, cache_ghosts, compute_grid_size[0]) + else: + _id = '{}+{}'.format(particle_offset, k) + store = self.vstore(1, lij, _id, load, align=True, jmp=True, suppress_semicolon=True) + code = '{} $&& ({})'.format(active_cond, store) + if debug_mode: + code += ' && (printf("last_active %i wrote %2.2f from cache position %i to global id %i.\\n", {},{},{},{}));'.format( + local_id[0], load, '{}+{}'.format(local_offset,k), _id) + else: + code += ';' + al.append(code) with if_thread_active(not_first=True): with s._align_() as al: if debug_mode: @@ -747,23 +830,25 @@ class DirectionalRemeshKernel(KernelCodeGenerator): self.vload(nparticles, cached_scalars[0], local_offset), local_offset, particle_offset if not is_periodic else '({}+{}-{})%{}'.format(particle_offset, compute_grid_size[0], cache_ghosts, compute_grid_size[0]), vnf='[%2.2v{}f]'.format(nparticles) if nparticles>1 else '%2.2f')) - for sc, line_sout in zip(cached_scalars, line_scalars_out): - load = self.vload(nparticles, sc, '{}'.format(local_offset)) - if is_periodic: - _id = '({}+{}-{})%{}'.format(particle_offset, compute_grid_size[0], cache_ghosts, compute_grid_size[0]) - else: - _id = particle_offset - store = self.vstore(nparticles, line_sout, _id, load, align=True, jmp=True) - al.append(store) + for ci, li in zip(cached_scalars, line_scalars_out): + for cij, lij in zip(ci, li): + load = self.vload(nparticles, cij, '{}'.format(local_offset)) + if is_periodic: + _id = '({}+{}-{})%{}'.format(particle_offset, compute_grid_size[0], cache_ghosts, compute_grid_size[0]) + else: + _id = particle_offset + store = self.vstore(nparticles, lij, _id, load, align=True, jmp=True) + al.append(store) if is_periodic: s.jumpline() _id = '({}+{}-{}+{})%{}'.format(particle_offset, compute_grid_size[0], cache_ghosts, '{}', compute_grid_size[0]) with s._if_('{} && ({} < 2*{})'.format(first, local_id[0], cache_ghosts)): with s._align_() as al: - for sc, bsc in zip(cached_scalars, boundary_scalars): - load = sc[local_id[0]] - store = bsc.affect(al, align=True, i=local_id[0], init=load) + for ci, bci in zip(cached_scalars, boundary_scalars): + for cij, bcij in zip(ci, bci): + load = cij[local_id[0]] + store = bcij.affect(al, align=True, i=local_id[0], init=load) s.jumpline() with s._if_('{} && ({} < 2*{})'.format(last, local_id[0], cache_ghosts)): @@ -771,11 +856,12 @@ class DirectionalRemeshKernel(KernelCodeGenerator): offset = '{}+{}-{}+{}'.format(line_offset, last_particle, cache_ghosts, local_id[0]) _gid = '({}+{})%{}'.format(offset, compute_grid_size[0], compute_grid_size[0]) with s._align_() as al: - for sc, bsc, line_sout in zip(cached_scalars, boundary_scalars, line_scalars_out): - val0 = bsc[local_id[0]] - val1 = sc['{}+{}'.format(last_particle, local_id[0])] - code = '{} $= {}+{};'.format(line_sout[_gid], val0, val1) - al.append(code) + for ci, bci, li in zip(cached_scalars, boundary_scalars, line_scalars_out): + for cij, bcij, lij in zip(ci,bci,li): + val0 = bcij[local_id[0]] + val1 = cij['{}+{}'.format(last_particle, local_id[0])] + code = '{} $= {}+{};'.format(lij[_gid], val0, val1) + al.append(code) if debug_mode: s.append('printf("%i initiated last write of value %2.2f + %2.2f = %2.2f from cache position %i+%i=%i to global id %i+%i-%i+%i=%i.\\n", {}, {},{},{}, {},{},{}, {},{},{},{},{});'.format( local_id[0], @@ -786,9 +872,10 @@ class DirectionalRemeshKernel(KernelCodeGenerator): else: with s._align_() as al: _gid = '{}+{}+{}'.format(line_offset, last_particle, local_id[0]) - for sc, line_sout in zip(cached_scalars, line_scalars_out): - init=sc['{}+{}'.format(last_particle, local_id[0])] - line_sout.affect(i=_gid, init=init, align=True, codegen=al) + for ci, li in zip(cached_scalars, line_scalars_out): + for cij, lij in zip(ci,li): + init=cij['{}+{}'.format(last_particle, local_id[0])] + lij.affect(i=_gid, init=init, align=True, codegen=al) if debug_mode: s.append('printf("%i initiated last write from cache position %i+%i=%i to global id %i+%i+%i=%i.\\n", {}, {},{},{},{}, {},{},{});'.format( local_id[0], @@ -802,15 +889,18 @@ class DirectionalRemeshKernel(KernelCodeGenerator): @staticmethod def autotune(cl_env, precision, direction, scalar_cfl, remesh_kernel, remesh_criteria_eps, - position, scalars_in, scalars_out, is_inplace, + position, scalars_in, scalars_out, + scalars_in_mesh_info, scalars_out_mesh_info, is_inplace, + autotuner_config, build_opts, force_atomics=False, relax_min_particles=False, dump_src=False, symbolic_mode=False): - check_instance(precision, Precision) - check_instance(position, dict, keys=DiscreteField) + check_instance(position, dict, keys=OpenClArray, values=CodegenStruct) + position_mesh_info = position.values()[0] + position = position.keys()[0] ftype = clTools.dtype_to_ctype(precision) - domain = position.keys()[0].domain + domain = scalars_in.keys()[0].domain dim = domain.dimension if dim<0 or dim>3: @@ -822,7 +912,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): msg+='there is no minimum particles to be used when using atomic accumulators.' raise ValueError(msg) - check_instance(cl_env, OpenClEnvironment): + check_instance(cl_env, OpenClEnvironment) device = cl_env.device context = cl_env.context platform = cl_env.platform @@ -842,23 +932,50 @@ class DirectionalRemeshKernel(KernelCodeGenerator): if typegen.platform!=platform or typegen.device!=device: raise ValueError('platform or device mismatch between typegen and cl_env.') - for variables in (position, scalars_in, scalars_out): - for var, var_mesh_info in variables.iteritems(): - if not isinstance(var,DiscreteField): - raise ValueError('{} is not a DiscreteField'.format(var.name)) - if var.backend.kind != Backend.OPENCL: - raise ValueError('{} is not a DiscreteField defined on an OPENCL backend.'.format(var.name)) - if (var.domain != ref_domain): - raise ValueError('{} domain mismatch with reference one.'.format(var.name)) - if not isinstance(var_mesh_info, CodegenStruct): - raise ValueError('{} mesh info is not a CodegenStruct variable.'.format(var.name)) - if not var_mesh_info.known(): - raise ValueError('{} mesh info is not a known CodegenStruct variable.'.format(var.name)) + if (scalars_in.keys()!=scalars_out.keys()): + extra_fields = set(scalars_in.keys()) ^ set(scalars_out.keys()) + print 'Missing input or output for fields:' + for field in extra_fields: + print ' *field {}'.format(field.name) + raise RuntimeError('Unmatched scalar input or output.') + + for scalars, mesh_infos in zip((scalars_in, scalars_out), + (scalars_in_mesh_info, scalars_out_mesh_info)): + for field in scalars.keys(): + dfield = scalars[field] + mesh_info = mesh_infos[field] + if not isinstance(field, Field): + msg='{} is not a Field.'.format(field.name) + if not isinstance(dfield,DiscreteField): + msg='{} is not a DiscreteField'.format(dfield.name) + raise ValueError(msg) + if dfield.backend.kind != Backend.OPENCL: + msg='{} is not a DiscreteField defined on an OPENCL backend.' + msg=msg.format(dfield.name) + raise ValueError(msg) + if (dfield.domain != domain): + msg='{} domain mismatch with reference one.'.format(dfield.name) + raise ValueError(msg) + if not isinstance(mesh_info, CodegenStruct): + msg='{} mesh info is not a CodegenStruct.' + msg=msg.format(dfield.name) + raise ValueError(msg) + if not mesh_info.known(): + msg='{} mesh info is not a known CodegenStruct dfieldiable.' + msg=msg.format(dfield.name) + raise ValueError(msg) if is_inplace: - assert (scalars_in==scalars_out) - else: - assert (scalars_in.keys()==scalars_out.keys()) - nscalars = [ scalar.nb_components for scalars in scalars_in.keys() ] + for field in scalars_in: + dfield_in = scalars_in[field] + dfield_out = scalars_out[field] + if dfield_in != dfield_out: + msg='Remeshing inplace but input and output discrete Field differs ' + msg+='for {} and {}.'.format(dfield_in.name, dfield_out.name) + raise RuntimeError(msg) + + group_scalars = tuple(dfield.nb_components for dfield in scalars_in.values()) + nfields = len(group_scalars) + nscalars = sum(group_scalars) p_resolution = position_mesh_info['local_mesh']['compute_resolution'].value[:dim] p_lboundary = position_mesh_info['local_mesh']['lboundary'].value[:dim] @@ -866,7 +983,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): p_ghosts = position_mesh_info['ghosts'].value[:dim] p_dx = position_mesh_info['dx'].value[:dim] - scalars_mesh_info = scalars_in.values()[0] + scalars_mesh_info = scalars_in_mesh_info.values()[0] s_resolution = scalars_mesh_info['local_mesh']['compute_resolution'].value[:dim] s_lboundary = scalars_mesh_info['local_mesh']['lboundary'].value[:dim] s_rboundary = scalars_mesh_info['local_mesh']['rboundary'].value[:dim] @@ -874,32 +991,39 @@ class DirectionalRemeshKernel(KernelCodeGenerator): sboundary = (s_lboundary[0], s_rboundary[0]) - min_s_ghosts = DirectionalAdvectionKernel.cache_ghosts(scalars_out_cache_ghosts, remesh_kernel) - min_wg_size = DirectionalAdvectionKernel.min_wg_size(dim, scalar_cfl, remesh_kernel) + min_s_ghosts = DirectionalRemeshKernel.scalars_out_cache_ghosts(scalar_cfl, + remesh_kernel) + min_wg_size = DirectionalRemeshKernel.min_wg_size(dim, scalar_cfl, + remesh_kernel) + + min_nparticles = 2 if relax_min_particles else int(2*np.ceil(scalar_cfl)) assert (min_s_ghosts>=1) + assert (min_nparticles>=2) if not cl_env.is_multi_device: if s_lboundary[0] == BoundaryCondition.PERIODIC: - assert p_lboundary[0] == BoundaryCondition.PERIODIC: + assert p_lboundary[0] == BoundaryCondition.PERIODIC assert p_rboundary[0] == BoundaryCondition.PERIODIC assert s_rboundary[0] == BoundaryCondition.PERIODIC min_s_ghosts = 0 - for scalars_mesh_info in self.scalars_in.values()+self.scalars_out.values(): - _s_resolution = scalars_mesh_info['local_mesh']['compute_resolution'].value[:dim] + for scalars_mesh_info in scalars_in_mesh_info.values()\ + +scalars_out_mesh_info.values(): + _s_resolution = scalars_mesh_info['local_mesh']['compute_resolution']\ + .value[:dim] _s_lboundary = scalars_mesh_info['local_mesh']['lboundary'].value[:dim] _s_rboundary = scalars_mesh_info['local_mesh']['rboundary'].value[:dim] _s_dx = scalars_mesh_info['dx'].value[:dim] - assert _s_resolution == s_resolution - assert _s_lboundary == s_lboundary - assert _s_rboundary == s_rboundary - assert _s_dx == s_dx + assert (_s_resolution == s_resolution).all() + assert (_s_lboundary == s_lboundary).all() + assert (_s_rboundary == s_rboundary).all() + assert (_s_dx == s_dx).all() - for scalars_mesh_info in self.scalars_out.values(): + for scalars_mesh_info in scalars_out_mesh_info.values(): _s_ghosts = scalars_mesh_info['ghosts'].value[:dim] if (_s_ghosts[0] < min_s_ghosts): - msg= 'Given boundary condition implies minimum ghosts numbers to be at least {}' - msg+='in current direction for output scalar but only {} ghosts ' + msg= 'Given boundary condition implies minimum ghosts numbers to be at ' + msg+='least {} in current direction for output scalar but only {} ghosts ' msg+='are present in the grid for scalar {}.' msg=msg.format(min_s_ghosts, s_ghosts[0], scalar.name) raise ValueError(msg) @@ -941,25 +1065,39 @@ class DirectionalRemeshKernel(KernelCodeGenerator): max_workitem_workload[dim:] = 1 ## Compile time known variables - known_vars = { 'position_mesh_info': position.values()[0] } - i=0 + known_vars = {} + kernel_args = [] + kernel_args_mapping = {} + + varname='position' + kernel_args_mapping[varname+'_base'] = (0, cl.MemoryObjectHolder) + kernel_args_mapping[varname+'_offset'] = (1, np.uint64) + kernel_args.append(position.base_data) + kernel_args.append(np.uint64(position.offset)) + known_vars[varname+'_mesh_info'] = position_mesh_info + + k=0 if is_inplace: - for (scalar,mesh_info) in self.scalars_in.iteritems(): - for _ in xrange(scalar.nb_components): - varname='S{}_inout'.format(i) - known_vars[varname] = mesh_info - i+=1 + it = zip(('_inout',), (scalars_in,),(scalars_in_mesh_info,)) else: - for scalar in scalars_in.keys(): - mesh_info_in = scalars_in[scalar] - mesh_info_out = scalars_out[scalar] - for _ in xrange(scalar.nb_components): - varname='S{}_'.format(i) - known_vars[varname+'in'] = mesh_info_in - known_vars[varname+'out'] = mesh_info_out - i+=1 - assert i==nscalars - + it = zip(('_in','_out'), + (scalars_in, scalars_out), + (scalars_in_mesh_info, scalars_out_mesh_info)) + for (postfix, scalars, mesh_infos) in it: + for i,scalar in enumerate(scalars.keys()): + dscalar = scalars[scalar] + mesh_info = mesh_infos[scalar] + assert dscalar.nb_components == group_scalars[i] + base_varname='S{}'.format(i) + for j in xrange(dscalar.nb_components): + varname='{}_{}{}'.format(base_varname,j,postfix) + kernel_args_mapping[varname+'_base'] = (2+2*k+0, cl.MemoryObjectHolder) + kernel_args_mapping[varname+'_offset'] = (2+2*k+1, np.uint64) + kernel_args.append(dscalar[j].base_data) + kernel_args.append(np.uint64(dscalar[j].offset)) + k+=1 + known_vars[base_varname+postfix+'_mesh_info'] = mesh_info + assert k==(2-is_inplace)*nscalars ## kernel generator def kernel_generator(work_size, work_load, local_work_size, @@ -969,45 +1107,49 @@ class DirectionalRemeshKernel(KernelCodeGenerator): return_codegen = False, **kargs): + _nparticles = extra_parameters['nparticles'] _use_atomics = extra_parameters['use_atomics'] - if (not _use_atomics) and (nparticles < min_nparticles): - if not relax_min_particles: + if (not _use_atomics) and (_nparticles < min_nparticles): + msg='Insufficient number of particles, min={}, got {}.' + msg=msg.format(min_nparticles, _nparticles) + raise KernelGenerationError(msg) ## Compile time known variables known_vars['local_size'] = local_work_size[:dim] ## CodeGenerator - name = DirectionalRemeshKernel.codegen_name(work_dim, + name = DirectionalRemeshKernel.codegen_name(dim, remesh_kernel, ftype, - nparticles, nscalars, remesh_criteria_eps, + _nparticles, nscalars, remesh_criteria_eps, _use_atomics, is_inplace) - codegen = DirectionalRemeshKernel(typegen=typegen, work_dim=dim, ftype=ftype, - nparticles=nparticles, nscalars=nscalars, + codegen = DirectionalRemeshKernel(typegen=typegen, + work_dim=dim, ftype=ftype, + nparticles=_nparticles, nscalars=nscalars, sboundary=sboundary, is_inplace=is_inplace, scalar_cfl=scalar_cfl, remesh_kernel=remesh_kernel, + group_scalars=group_scalars, remesh_criteria_eps=remesh_criteria_eps, use_atomics = _use_atomics, symbolic_mode = symbolic_mode, debug_mode = False, - known_vars = known_vars): - - global_size = codegen.get_global_size(work_size=work_size, local_work_size=local_work_size, - work_load=work_load) - + known_vars = known_vars) + global_size = codegen.get_global_size(work_size=work_size, + local_work_size=local_work_size, work_load=work_load) + usable_cache_bytes_per_wg = clCharacterize.usable_local_mem_size(device) if codegen.required_workgroup_cache_size(local_work_size[:dim])[2] > \ usable_cache_bytes_per_wg: raise KernelGenerationError('Insufficient device cache.') ## generate source code and build kernel - src = codegen.__str__() - src_hash = hashlib.sha512(src).hexdigest() - prg = cl_env.build_raw_src(src, build_opts, + src = codegen.__str__() + src_hash = hashlib.sha512(src).hexdigest() + prg = cl_env.build_raw_src(src, build_opts, kernel_name=name, force_verbose=force_verbose, force_debug=force_debug) - kernel = prg.all_kernels()[0] + kernel = prg.all_kernels()[0] if return_codegen: return (codegen, kernel, kernel_args, src_hash, global_size) @@ -1015,33 +1157,25 @@ class DirectionalRemeshKernel(KernelCodeGenerator): return (kernel, kernel_args, src_hash, global_size) ## Kernel Autotuner - name = DirectionalAdvectionKernel.codegen_name(ftype, False, rk_scheme, 0, - cache_ghosts) + name = DirectionalRemeshKernel.codegen_name(dim, + remesh_kernel, ftype, + 1, nscalars, remesh_criteria_eps, + force_atomics, is_inplace) autotuner = KernelAutotuner(name=name, work_dim=dim, local_work_dim=1, build_opts=build_opts, autotuner_config=autotuner_config) autotuner.add_filter('1d_shape_min', autotuner.min_workitems_per_direction) - autotuner.register_extra_parameter('is_cached', caching_options) - autotuner.register_extra_parameter('nparticles', nparticles_options) + autotuner.register_extra_parameter('use_atomics', use_atomics_options) + autotuner.register_extra_parameter('nparticles', nparticles_options) autotuner.enable_variable_workitem_workload( max_workitem_workload=max_workitem_workload) - - kernel_args = [precision(1), - velocity[direction].base_data, np.uint64(velocity[direction].offset), - position.base_data, np.uint64(position.offset)] - kernel_args_mapping= { - 'dt': (0, precision), - 'velocity_base': (1, cl.MemoryObjectHolder), - 'velocity_offset': (2, np.uint64), - 'position_base': (3, cl.MemoryObjectHolder), - 'position_offset': (4, np.uint64) - } (gwi, lwi, stats, work_load, extra_params) = autotuner.bench(typegen=typegen, - work_size=work_size, kernel_args=kernel_args, + work_size=work_size, kernel_generator=kernel_generator, + kernel_args=kernel_args, dump_src=dump_src, min_local_size=min_local_size, - get_max_global_size=DirectionalAdvectionKernel.get_max_global_size) + get_max_global_size=DirectionalRemeshKernel.get_max_global_size) (codegen, kernel, kernel_args, src_hash, global_size) = kernel_generator( work_size=work_size, work_load=work_load, @@ -1090,7 +1224,7 @@ if __name__ == '__main__': use_atomics=True, is_inplace=False, symbolic_mode=False, - debug_mode=True, + debug_mode=False, sboundary=(BoundaryCondition.PERIODIC, BoundaryCondition.PERIODIC), known_vars=dict( S0_in_mesh_info=smesh_info, diff --git a/hysop/backend/device/kernel_autotuner.py b/hysop/backend/device/kernel_autotuner.py index 1545675c639104e36971382efcfabcad37fff729..f15f9bdb0e7266ffc1047b310af2eddcb29176cc 100644 --- a/hysop/backend/device/kernel_autotuner.py +++ b/hysop/backend/device/kernel_autotuner.py @@ -425,7 +425,7 @@ class KernelAutotuner(object): **kargs) except KernelGenerationError: pruned_count += 1 - raise + continue global_size = np.asarray(global_size) gwi = tuple(global_size) diff --git a/hysop/backend/device/opencl/opencl_operator.py b/hysop/backend/device/opencl/opencl_operator.py index ae1215c551f2980d5051c8bb4610b16f0322ce89..1f8883d3303881f918253518bef1c2a3267b0e74 100644 --- a/hysop/backend/device/opencl/opencl_operator.py +++ b/hysop/backend/device/opencl/opencl_operator.py @@ -210,7 +210,7 @@ class OpenClOperator(ComputationalGraphOperator): input_mesh_info[field] = MeshInfoStruct.create_from_mesh( name='{}_in_field_mesh_info'.format(field.name), cl_env=self.cl_env, - mesh=mesh) + mesh=mesh)[1] output_mesh_info = {} for field, topo in self.output_vars.iteritems(): @@ -219,7 +219,7 @@ class OpenClOperator(ComputationalGraphOperator): output_mesh_info[field] = MeshInfoStruct.create_from_mesh( name='{}_out_field_mesh_info'.format(field.name), cl_env=self.cl_env, - mesh=mesh) + mesh=mesh)[1] self.input_mesh_info = input_mesh_info self.output_mesh_info = output_mesh_info diff --git a/hysop/backend/device/opencl/operator/directional/advection_dir.py b/hysop/backend/device/opencl/operator/directional/advection_dir.py index 04ad728fe3e8cd6d6bdea61ad4bfea72e9a55f09..b2a3cebbdfa7e6d596e3fcd7971105612bedc29e 100644 --- a/hysop/backend/device/opencl/operator/directional/advection_dir.py +++ b/hysop/backend/device/opencl/operator/directional/advection_dir.py @@ -12,10 +12,14 @@ from hysop.backend.device.kernel_config import KernelConfig from hysop.backend.device.opencl.operator.directional.opencl_directional_operator import OpenClDirectionalOperator from hysop.tools.types import InstanceOf +from hysop.numerics.remesh.remesh import RemeshKernel from hysop.methods import Interpolation, Precision, TimeIntegrator, Remesh from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta, Euler, RK2, RK3, RK4 -from hysop.backend.device.codegen.kernels.directional_advection import DirectionalAdvectionKernel +from hysop.backend.device.codegen.kernels.directional_advection import \ + DirectionalAdvectionKernel +from hysop.backend.device.codegen.kernels.directional_remesh import \ + DirectionalRemeshKernel from hysop.core.memory.memory_request import MemoryRequest from hysop.backend.device.codegen.structs.mesh_info import MeshInfoStruct @@ -24,19 +28,21 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): __default_method = { TimeIntegrator: Euler, Interpolation: Interpolation.LINEAR, - Remesh: None, + Remesh: Remesh.L2_1, KernelConfig: KernelConfig() } __available_methods = { TimeIntegrator: InstanceOf(ExplicitRungeKutta), Interpolation: Interpolation.LINEAR, - Remesh: None, + Remesh: (InstanceOf(Remesh), InstanceOf(RemeshKernel)), KernelConfig: InstanceOf(KernelConfig) } @debug - def __init__(self, velocity, advected_fields, advected_fields_out, variables, cfl, + def __init__(self, velocity, + advected_fields, advected_fields_out, variables, velocity_cfl, + force_atomics=False, relax_min_particles=False, remesh_criteria_eps=None, **kwds): """ Particular advection of field(s) in a given direction, @@ -55,8 +61,23 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): Where input fields are remeshed variables: dict Dictionary of continuous fields as keys and topologies as values. - cfl: float + velocity_cfl: float Velocity cfl in given direction. + force_atomics: bool + If set, only use atomic accumulators for remesh. + Default: autotuner will automatically figure out the best strategy + between using atomic operations and remeshing multiple particles at + a time. + relax_min_particles: bool + Relax the minimum particles required for correct accumulation of remeshed + quantities in cache when not using atomic operations. + If this is set to True, the minimum number will be reset to 2. + The minimum number of particles to be remeshed at a time, when + by a work_item is 2*ceil(scalar_cfl) because of maximum potential + particles superposition after the advection step. + remesh_criteria_eps: int + Minimum number of epsilons (in specified precision) that will trigger + remeshing. By default every non zero value is remeshed. kwds: Extra parameters passed to generated directional operators. @@ -71,7 +92,7 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): """ check_instance(velocity, Field) - check_instance(advected_fields, list, values=Field) + check_instance(advected_fields, list, values=Field) check_instance(advected_fields_out, list, values=Field) check_instance(variables, dict, keys=Field, values=CartesianDescriptors) assert (len(advected_fields)==len(advected_fields_out)) @@ -79,24 +100,41 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): input_vars = { velocity: variables[velocity] } output_vars = {} + is_inplace = True + for field in advected_fields: - input_vars[field] = variables[field] + input_vars[field] = variables[field] for field in advected_fields_out: output_vars[field] = variables[field] - + if input_vars[field] == output_vars[field]: + assert is_inplace, 'Cannot mix in place and out of place remesh.' + else: + is_inplace = False + super(OpenClDirectionalAdvection,self).__init__(input_vars=input_vars, output_vars=output_vars, **kwds) self.velocity = velocity + self.first_scalar = advected_fields[0] self.advected_fields_in = advected_fields self.advected_fields_out = advected_fields_out - self.cfl = cfl + self.velocity_cfl = velocity_cfl + self.is_inplace = is_inplace + + self.force_atomics = force_atomics + self.relax_min_particles = relax_min_particles + self.remesh_criteria_eps = remesh_criteria_eps @debug def handle_method(self,method): super(OpenClDirectionalAdvection,self).handle_method(method) - self.remesh = method.pop(Remesh) + + remesh_kernel = method.pop(Remesh) + if isinstance(remesh_kernel, Remesh): + remesh_kernel = RemeshKernel.from_enum(remesh_kernel) + + self.remesh_kernel = remesh_kernel self.interp = method.pop(Interpolation) self.time_integrator = method.pop(TimeIntegrator) @@ -104,15 +142,43 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): def get_field_requirements(self): requirements = super(OpenClDirectionalAdvection, self).get_field_requirements() + direction = self.direction + is_inplace = self.is_inplace + velocity = self.velocity + velocity_cfl = self.velocity_cfl v_topo, v_requirements = requirements.get_input_requirement(velocity) v_dx = v_topo.mesh.space_step - advection_ghosts = int(np.ceil(self.cfl * v_dx[self.direction])) - min_ghosts = npw.integer_zeros(v_dx.shape) - min_ghosts[self.direction] = advection_ghosts + advection_ghosts = int(np.ceil(velocity_cfl * v_dx[direction])) + min_velocity_ghosts = npw.integer_zeros(v_dx.shape) + min_velocity_ghosts[self.direction] = advection_ghosts + v_requirements.min_ghosts = min_velocity_ghosts + + scalar = self.first_scalar + s_topo, _ = requirements.get_input_requirement(scalar) + s_dx = s_topo.mesh.space_step + scalar_cfl = advection_ghosts * (v_dx[direction] / s_dx[direction]) + + remesh_ghosts = DirectionalRemeshKernel.scalars_out_cache_ghosts(scalar_cfl, + self.remesh_kernel) + min_scalar_ghosts = npw.integer_zeros(s_dx.shape) + min_scalar_ghosts[self.direction] = remesh_ghosts + + for sfield in self.advected_fields_out: + _s_topo, _s_requirements = requirements.get_output_requirement(sfield) + _s_dx = _s_topo.mesh.space_step + assert (_s_dx == s_dx).all() + _s_requirements.min_ghosts = min_scalar_ghosts + + if is_inplace: + for sfield in self.advected_fields_in: + _s_topo, _s_requirements = requirements.get_input_requirement(sfield) + _s_dx = _s_topo.mesh.space_step + assert (_s_dx == s_dx).all() + _s_requirements.min_ghosts = min_scalar_ghosts - v_requirements.min_ghosts = min_ghosts + self.scalar_cfl = scalar_cfl return requirements @@ -122,35 +188,31 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): dvelocity = self.input_discrete_fields[self.velocity] velocity_mesh_info = self.input_mesh_info[self.velocity] - vdx = velocity_mesh_info[1].value['dx'][0] - advection_ghosts = int(npw.ceil(self.cfl * vdx)) - assert (advection_ghosts >= 1) - + # field -> discrete_field dadvected_fields_in = {field: self.input_discrete_fields[field] for field in self.advected_fields_in} dadvected_fields_out = {field: self.output_discrete_fields[field] for field in self.advected_fields_out} - fields_in_mesh_info = [ self.input_mesh_info[field] - for field in self.advected_fields_in] - fields_out_mesh_info = [ self.output_mesh_info[field] - for field in self.advected_fields_out] + # field -> dfield_mesh_info + dfields_in_mesh_info = { field : self.input_mesh_info[field] + for field in self.advected_fields_in } + dfields_out_mesh_info = { field : self.output_mesh_info[field] + for field in self.advected_fields_out } - fdx = fields_in_mesh_info[0][1].vars['dx'][0] - xmin = fields_in_mesh_info[0][1].vars['global_mesh'].vars['xmin'][0] - for field in fields_in_mesh_info + fields_out_mesh_info: - assert field[1].vars['dx'][0] == fdx - assert field[1].vars['global_mesh'].vars['xmin'][0] == xmin + fdx = dfields_in_mesh_info.values()[0].vars['dx'][0] + xmin = dfields_in_mesh_info.values()[0].vars['global_mesh'].vars['xmin'][0] + for mesh_info in dfields_in_mesh_info.values() + dfields_out_mesh_info.values(): + assert mesh_info.vars['dx'][0] == fdx + assert mesh_info.vars['global_mesh'].vars['xmin'][0] == xmin self.dvelocity = dvelocity self.dadvected_fields_in = dadvected_fields_in self.dadvected_fields_out = dadvected_fields_out - self.velocity_mesh_info = velocity_mesh_info - self.fields_in_mesh_info = fields_in_mesh_info - self.fields_out_mesh_info = fields_out_mesh_info - - self.advection_ghosts = advection_ghosts + self.velocity_mesh_info = velocity_mesh_info + self.dfields_in_mesh_info = dfields_in_mesh_info + self.dfields_out_mesh_info = dfields_out_mesh_info @debug def get_work_properties(self): @@ -162,7 +224,7 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): requests.push_mem_request('position', request) self.position_mesh_info = MeshInfoStruct.create_from_mesh('position_mesh_info', - self.cl_env, mesh) + self.cl_env, mesh)[1] return requests @@ -180,25 +242,20 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): if (work is None): raise ValueError('work is None.') - # field_on_part = {} - # for field_on_grid in self.advected_fields_in.values(): - # request_id = 'field_on_part_{}'.format(field_on_grid.name) - # field_on_part[field_on_grid] = work.get_buffer(self, request_id) - # self.field_on_part = field_on_part - self.dposition = work.get_buffer(self, 'position') def _collect_kernels(self): - self._collect_advection_kernel() + # self._collect_advection_kernel() + self._collect_remesh_kernel() def _collect_advection_kernel(self): velocity = self.dvelocity position = self.dposition - velocity_mesh_info = self.velocity_mesh_info[1] - position_mesh_info = self.position_mesh_info[1] + velocity_mesh_info = self.velocity_mesh_info + position_mesh_info = self.position_mesh_info - cfl = self.cfl + cfl = self.cfl precision = self.precision time_integrator = self.time_integrator direction = self.direction @@ -226,10 +283,9 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): def _collect_remesh_kernel(self): position = self.dposition - velocity_mesh_info = self.velocity_mesh_info[1] - position_mesh_info = self.position_mesh_info[1] + position_mesh_info = self.position_mesh_info - cfl = self.scalar_cfl + scalar_cfl = self.scalar_cfl precision = self.precision time_integrator = self.time_integrator direction = self.direction @@ -237,21 +293,40 @@ class OpenClDirectionalAdvection(OpenClDirectionalOperator): cl_env = self.cl_env build_options = self.build_options() autotuner_config = self.autotuner_config - + + remesh_kernel = self.remesh_kernel + + remesh_criteria_eps = self.remesh_criteria_eps + force_atomics = self.force_atomics + relax_min_particles = self.relax_min_particles + + position = {self.dposition : self.position_mesh_info} + scalars_in = self.dadvected_fields_in + scalars_out = self.dadvected_fields_out + scalars_in_mesh_info = self.dfields_in_mesh_info + scalars_out_mesh_info = self.dfields_out_mesh_info + is_inplace = self.is_inplace + (kernel_launcher, kernel_args, kernel_args_mapping, total_work, per_work_statistic, cached_bytes) = \ - DirectionalAdvectionKernel.autotune( - direction=direction, - cfl=cfl, - cl_env=cl_env, + DirectionalRemeshKernel.autotune(cl_env=cl_env, precision=precision, - velocity=velocity, + direction=direction, + scalar_cfl=scalar_cfl, + remesh_kernel=remesh_kernel, + remesh_criteria_eps=remesh_criteria_eps, position=position, - velocity_mesh_info = velocity_mesh_info, - position_mesh_info = position_mesh_info, - time_integrator=time_integrator, + scalars_in=scalars_in, + scalars_out=scalars_out, + scalars_in_mesh_info=scalars_in_mesh_info, + scalars_out_mesh_info=scalars_out_mesh_info, + is_inplace=is_inplace, + autotuner_config=autotuner_config, build_opts=build_options, - autotuner_config=autotuner_config) + force_atomics=force_atomics, + relax_min_particles=relax_min_particles, + dump_src=False, + symbolic_mode=False) @debug diff --git a/hysop/core/graph/computational_graph.py b/hysop/core/graph/computational_graph.py index b0ec596aded896c6ad6d82a1a943c496b6b85255..271a7b68927cebe3593c46eb7938c3b519ca39d1 100644 --- a/hysop/core/graph/computational_graph.py +++ b/hysop/core/graph/computational_graph.py @@ -246,7 +246,6 @@ class ComputationalGraph(ComputationalGraphNode): input_vars = {} output_vars = {} - print 'BUILD' # dictionnary topology -> node that wrote this topology field_write_nodes = {} # dictionnary topology -> list of nodes that reads field:topo diff --git a/hysop/fields/field_requirements.py b/hysop/fields/field_requirements.py index 271dd7c80db071d227808cde5cb2dffcee75e172..22863874d6b712db8191a482139268ebe3bbda84 100644 --- a/hysop/fields/field_requirements.py +++ b/hysop/fields/field_requirements.py @@ -388,7 +388,7 @@ class OperatorFieldRequirements(object): it1 = it.izip_longest((False,), self.iter_output_requirements()) return it.chain(it0, it1) - def get_input_requirement(self, field): + def _get_requirement(self, field, field_requirements): """ Get unique requirement and topology descriptor for given field, if it exists. This is a facility for ComputationalGraphOperators to retrieve their unique per field requirements. @@ -398,10 +398,10 @@ class OperatorFieldRequirements(object): raise a RuntimeError. """ check_instance(field, Field) - if field not in self._input_field_requirements: + if field not in field_requirements: msg='No requirements found for field {}.'.format(field.name) raise AttributeError(msg) - freqs = self._input_field_requirements[field].requirements + freqs = field_requirements[field].requirements if len(freqs.keys())>1: msg='Multiple topology descriptors are present for field {}.'.format(field.name) raise RuntimeError(msg) @@ -412,6 +412,11 @@ class OperatorFieldRequirements(object): raise RuntimeError(msg) return (td, next(iter(reqs))) + def get_input_requirement(self, field): + return self._get_requirement(field, self._input_field_requirements) + def get_output_requirement(self, field): + return self._get_requirement(field, self._output_field_requirements) + @debug def build_topologies(self): if self._input_field_requirements: diff --git a/hysop/methods.py b/hysop/methods.py index a658b46cc07fc88830d7b84ab17012904e06f9c0..d3268d89e76abb51eac79859b598e5fdbe05b6ce 100644 --- a/hysop/methods.py +++ b/hysop/methods.py @@ -28,5 +28,4 @@ from hysop.numerics.odesolvers.runge_kutta import TimeIntegrator from hysop.numerics.remesh.remesh import Remesh from hysop.numerics.splitting.strang import StrangOrder - from hysop.backend.device.kernel_config import KernelConfig diff --git a/hysop/numerics/remesh/remesh.py b/hysop/numerics/remesh/remesh.py index 837799e3c40732394b588bbb8bca650b8f087294..bdc0a14c6fbc784478ae0effb5eef818d86a7144 100644 --- a/hysop/numerics/remesh/remesh.py +++ b/hysop/numerics/remesh/remesh.py @@ -1,9 +1,12 @@ from hysop.constants import __VERBOSE__ +from hysop.tools.enum import EnumFactory +from hysop.tools.types import check_instance from hysop.numerics.remesh.kernel_generator import Kernel, SymmetricKernelGenerator -class Remesh(object): - pass +Remesh = EnumFactory.create('Remesh', + ['L2_1','L2_2','L4_2','L4_4','L6_4','L6_6','L8_4', + 'L2_1s','L2_2s','L4_2s','L4_4s','L6_4s','L6_6s','L8_4s']) class RemeshKernelGenerator(SymmetricKernelGenerator): def configure(self,n): @@ -23,6 +26,23 @@ class RemeshKernel(Kernel): split_polys=split_polys, no_wrap=True) super(RemeshKernel,self).__init__(**kargs) + + @staticmethod + def from_enum(remesh): + check_instance(remesh, Remesh) + remesh = str(remesh) + assert remesh[0]=='L' + remesh=remesh[1:] + if remesh[-1] == 's': + remesh = remesh[:-1] + split_polys=True + else: + split_polys=False + remesh = [int(x) for x in remesh.split('_')] + assert len(remesh) == 2 + assert remesh[0] >= 2 + assert remesh[1] >= 1 + return RemeshKernel(remesh[0], remesh[1], split_polys) def __str__(self): return 'RemeshKernel(n={}, r={}, split={})'.format(self.n, self.r, self.poly_splitted) diff --git a/hysop/tools/types.py b/hysop/tools/types.py index 3d8ef1e696ebf42dd45d8871495316b9e9ffc5e2..02db6b0bba4855f107f4f77126edf951b50fcb68 100644 --- a/hysop/tools/types.py +++ b/hysop/tools/types.py @@ -40,7 +40,7 @@ def check_instance(val, cls, allow_none=False, **kargs): for k,v in val.iteritems(): if not isinstance(k,key_cls): msg='Key contained in {} is not an instance of {}, got {}.' - msg=msg.format(cls.__name__, k, key_cls.__name__, k.__class__) + msg=msg.format(cls.__name__, key_cls.__name__, k.__class__) raise TypeError(msg) if 'values' in kargs: val_cls = kargs.pop('values') @@ -49,7 +49,7 @@ def check_instance(val, cls, allow_none=False, **kargs): if hasattr(k, 'name'): key=k.name msg='Value contained in {} at key \'{}\' is not an instance of {}, got {}.' - msg=msg.format(cls.__name__, key, val_cls, v.__class__) + msg=msg.format(cls.__name__, k, val_cls, v.__class__) raise TypeError(msg) if kargs: raise RuntimeError('Some arguments were not used ({}).'.format(kargs))