diff --git a/hysop/backend/device/codegen/functions/directional_remesh.py b/hysop/backend/device/codegen/functions/directional_remesh.py index ff439bb0dc387302ef7cd89c6ab7c87148e0b0d4..68defa89f3af24678e34b3f65f4853c580d8cf5f 100644 --- a/hysop/backend/device/codegen/functions/directional_remesh.py +++ b/hysop/backend/device/codegen/functions/directional_remesh.py @@ -1,61 +1,63 @@ from hysop.deps import sm, np, contextlib from hysop.tools.types import check_instance, first_not_None -from hysop.backend.device.opencl.opencl_types import OpenClTypeGen +from hysop.backend.device.opencl.opencl_types import OpenClTypeGen -from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator +from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.device.codegen.base.function_codegen import OpenClFunctionCodeGenerator -from hysop.backend.device.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict -from hysop.backend.device.codegen.base.statistics import WorkStatistics +from hysop.backend.device.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin +from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict +from hysop.backend.device.codegen.base.statistics import WorkStatistics from hysop.backend.device.codegen.functions.polynomial import PolynomialFunction from hysop.backend.device.codegen.functions.custom_atomics import CustomAtomicFunction from hysop.constants import BoundaryCondition from hysop.numerics.remesh.remesh import RemeshKernel +from hysop.numerics.remesh.kernel_generator import Kernel from hysop.numerics.stencil.stencil_generator import StencilGenerator + class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): def __init__(self, typegen, work_dim, itype, ftype, - nparticles, nscalars, - sboundary, remesh_kernel, - use_atomics, remesh_criteria_eps, - use_short_circuit=None, - known_args=None, - debug_mode=False): - - check_instance(sboundary,tuple,values=BoundaryCondition) - check_instance(remesh_kernel, RemeshKernel) + nparticles, nscalars, + sboundary, remesh_kernel, + use_atomics, remesh_criteria_eps, + use_short_circuit=None, + known_args=None, + debug_mode=False): + + check_instance(sboundary, tuple, values=BoundaryCondition) + check_instance(remesh_kernel, (RemeshKernel, Kernel)) assert remesh_kernel.n % 2 == 0 or remesh_kernel.n == 1 assert remesh_kernel.n > 0 use_short_circuit = first_not_None(use_short_circuit, typegen.use_short_circuit_ops) - is_periodic = (sboundary[0]==BoundaryCondition.PERIODIC) and \ - (sboundary[1]==BoundaryCondition.PERIODIC) + is_periodic = (sboundary[0] == BoundaryCondition.PERIODIC) and \ + (sboundary[1] == BoundaryCondition.PERIODIC) ivtype = typegen.vtype(itype, nparticles) fvtype = typegen.vtype(ftype, nparticles) reqs = self.build_requirements(typegen, work_dim, itype, ftype, - ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel, - use_atomics, remesh_criteria_eps) + ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel, + use_atomics, remesh_criteria_eps) - (args,basename) = self.build_prototype(reqs, typegen, work_dim, itype, ftype, - ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel, - use_atomics, remesh_criteria_eps, is_periodic) + (args, basename) = self.build_prototype(reqs, typegen, work_dim, itype, ftype, + ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel, + use_atomics, remesh_criteria_eps, is_periodic) - super(DirectionalRemeshFunction,self).__init__(basename=basename, - output=None, typegen=typegen, inline=True, - args=args, known_args=known_args) + super(DirectionalRemeshFunction, self).__init__(basename=basename, + output=None, typegen=typegen, inline=True, + args=args, known_args=known_args) self.update_requirements(reqs) self.work_dim = work_dim - self.itype = itype - self.ftype = ftype + self.itype = itype + self.ftype = ftype self.ivtype = ivtype self.fvtype = fvtype @@ -74,72 +76,70 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): @staticmethod def _printv(ncomponents): - assert ncomponents in [1,2,4,8,16] - if ncomponents==1: + assert ncomponents in [1, 2, 4, 8, 16] + if ncomponents == 1: return '' else: return 'v{}'.format(ncomponents) - def build_prototype(self, reqs, typegen, work_dim, itype, ftype, - ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel, - use_atomics, remesh_criteria_eps, is_periodic): + ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel, + use_atomics, remesh_criteria_eps, is_periodic): args = ArgDict() - atomic = 'atomic_' if use_atomics else '' - criteria = '{}eps__'.format(remesh_criteria_eps) if (remesh_criteria_eps is not None) else 'full' + atomic = 'atomic_' if use_atomics else '' + criteria = '{}eps__'.format(remesh_criteria_eps) if ( + remesh_criteria_eps is not None) else 'full' - basename = '__{}remesh_{}d__lambda_{}_{}__{}__{}p__{}s__{}'.format( - atomic, work_dim, - remesh_kernel.n, remesh_kernel.r, ftype, - nparticles, nscalars, - criteria) + basename = '__{}remesh_{}d__lambda_{}_{}__{}__{}p__{}s__{}'.format( + atomic, work_dim, + remesh_kernel.n, remesh_kernel.r, ftype, + nparticles, nscalars, + criteria) args['p'] = CodegenVectorClBuiltin(name='p', btype=ftype, dim=nparticles, typegen=typegen, - add_impl_const=True, nl=True) + add_impl_const=True, nl=True) scalars = [] for i in xrange(nscalars): Si = CodegenVectorClBuiltin(name='s{}'.format(i), btype=ftype, dim=nparticles, typegen=typegen, - add_impl_const=True, nl=(i==nscalars-1)) + add_impl_const=True, nl=(i == nscalars-1)) scalars.append(Si) args[Si.name] = Si args['dx'] = CodegenVariable(name='dx', ctype=ftype, typegen=typegen, - add_impl_const=True) + add_impl_const=True) args['inv_dx'] = CodegenVariable(name='inv_dx', ctype=ftype, typegen=typegen, - add_impl_const=True, nl=True) + add_impl_const=True, nl=True) if is_periodic: args['grid_size'] = CodegenVariable(name='grid_size', ctype=itype, typegen=typegen, - add_impl_const=True) + add_impl_const=True) args['line_offset'] = CodegenVariable(name='line_offset', ctype=itype, typegen=typegen, - add_impl_const=True) - args['cache_width'] = CodegenVariable('cache_width', ctype=itype, typegen=typegen) + add_impl_const=True) + args['cache_width'] = CodegenVariable('cache_width', ctype=itype, typegen=typegen) args['cache_ghosts'] = CodegenVariable(name='cache_ghosts', ctype=itype, typegen=typegen, - add_impl_const=True) + add_impl_const=True) args['active'] = CodegenVariable(name='active', ctype='bool', typegen=typegen, - add_impl_const=True, nl=True) - + add_impl_const=True, nl=True) cached_scalars = [] for i in xrange(nscalars): Si = CodegenVariable(name='S{}'.format(i), ctype=ftype, typegen=typegen, - ptr_restrict=True, ptr=True, storage=self._local, - ptr_const=False, add_impl_const=True, nl=True) + ptr_restrict=True, ptr=True, storage=self._local, + ptr_const=False, add_impl_const=True, nl=True) cached_scalars.append(Si) args[Si.name] = Si self.scalars = scalars self.cached_scalars = cached_scalars - return (args,basename) - + return (args, basename) def build_requirements(self, typegen, work_dim, itype, ftype, - ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel, - use_atomics, remesh_criteria_eps): + ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel, + use_atomics, remesh_criteria_eps): reqs = WriteOnceDict() @@ -147,24 +147,24 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): kernel = remesh_kernel if kernel.poly_splitted: self.poly_splitted = True - name='lambda_{}_{}__{}'.format(kernel.n, kernel.r, '{}_{}') + name = 'lambda_{}_{}__{}'.format(kernel.n, kernel.r, '{}_{}') for i, _ in enumerate(kernel.Pt_l): - ilname = name.format(i,'left') - irname = name.format(i,'right') + ilname = name.format(i, 'left') + irname = name.format(i, 'right') Pil = PolynomialFunction(typegen, ftype, nparticles, - kernel.Cl[:,i], ilname, use_fma=True, var='y') + kernel.Cl[:, i], ilname, use_fma=True, var='y') Pir = PolynomialFunction(typegen, ftype, nparticles, - kernel.Cr[:,i], irname, use_fma=True, var='y') - P.append((Pil,Pir)) + kernel.Cr[:, i], irname, use_fma=True, var='y') + P.append((Pil, Pir)) reqs[ilname] = Pil reqs[irname] = Pir else: self.poly_splitted = False - name='lambda_{}_{}__{}'.format(kernel.n, kernel.r, '{}') + name = 'lambda_{}_{}__{}'.format(kernel.n, kernel.r, '{}') for i, _ in enumerate(kernel.Pt): iname = name.format(i) Pi = PolynomialFunction(typegen, ftype, nparticles, - kernel.C[:,i], iname, use_fma=True, var='y') + kernel.C[:, i], iname, use_fma=True, var='y') P.append((Pi,)) reqs[iname] = Pi self.polynomials = P @@ -190,9 +190,9 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): ivtype = s.ivtype fvtype = s.fvtype - nparticles = s.nparticles - nscalars = s.nscalars - sboundary = s.sboundary + nparticles = s.nparticles + nscalars = s.nscalars + sboundary = s.sboundary use_atomics = s.use_atomics use_short_circuit = s.use_short_circuit remesh_criteria_eps = s.remesh_criteria_eps @@ -215,13 +215,14 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): poly_splitted = s.poly_splitted - lb = '[' if (nparticles>1) else '' - rb = ']' if (nparticles>1) else '' + lb = '[' if (nparticles > 1) else '' + rb = ']' if (nparticles > 1) else '' vnf = '{}{}{}'.format(lb, ', '.join('%2.2f' for _ in xrange(nparticles)), rb) vni = '{}{}{}'.format(lb, ', '.join('%i' for _ in xrange(nparticles)), rb) - expand_printf_vector = lambda x: str(x) if (nparticles==1) else ','.join( - '({}).s{}'.format(x, '0123456789abcdef'[i]) - if isinstance(x, str) else x[i] for i in xrange(nparticles)) + + def expand_printf_vector(x): return str(x) if (nparticles == 1) else ','.join( + '({}).s{}'.format(x, '0123456789abcdef'[i]) + if isinstance(x, str) else x[i] for i in xrange(nparticles)) epv = expand_printf_vector @contextlib.contextmanager @@ -234,17 +235,18 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): dtype = tg.np_dtype(ftype) - P = CodegenVariable(name='P', ctype=itype, typegen=tg, - const=True, value=1+(self.kernel.n/2)) + P = CodegenVariable(name='P', ctype=itype, typegen=tg, + const=True, value=1+(self.kernel.n/2)) eps = CodegenVariable(name='eps', ctype=ftype, typegen=tg, - const=True, value=np.finfo(dtype).eps) + const=True, value=np.finfo(dtype).eps) - rp = CodegenVectorClBuiltin(name='rp', btype=ftype, dim=nparticles, typegen=tg) - y = CodegenVectorClBuiltin(name='y', btype=ftype, dim=nparticles, typegen=tg) - ind = CodegenVectorClBuiltin(name='ind', btype=itype, dim=nparticles, typegen=tg) + rp = CodegenVectorClBuiltin(name='rp', btype=ftype, dim=nparticles, typegen=tg) + y = CodegenVectorClBuiltin(name='y', btype=ftype, dim=nparticles, typegen=tg) + ind = CodegenVectorClBuiltin(name='ind', btype=itype, dim=nparticles, typegen=tg) find = CodegenVectorClBuiltin(name='find', btype=ftype, dim=nparticles, typegen=tg) - vone = CodegenVectorClBuiltin(name='one', btype=ftype, dim=nparticles, typegen=tg, value=(1,)*nparticles) - + vone = CodegenVectorClBuiltin(name='one', btype=ftype, + dim=nparticles, typegen=tg, value=(1,)*nparticles) + if debug_mode: tst0 = CodegenVectorClBuiltin(name='tst0', btype=ftype, dim=nparticles, typegen=tg) tst1 = CodegenVectorClBuiltin(name='tst1', btype=ftype, dim=nparticles, typegen=tg) @@ -253,7 +255,7 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): wl = CodegenVectorClBuiltin(name='Wl', btype=ftype, dim=nparticles, typegen=tg) wr = CodegenVectorClBuiltin(name='Wr', btype=ftype, dim=nparticles, typegen=tg) w = CodegenVectorClBuiltin(name='W', btype=ftype, dim=nparticles, typegen=tg) - weights = (wl,wr,w) + weights = (wl, wr, w) else: w = CodegenVectorClBuiltin(name='W', btype=ftype, dim=nparticles, typegen=tg) weights = (w,) @@ -263,7 +265,7 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): if (remesh_criteria_eps is not None): eps.declare(s) s.jumpline() - + if debug_mode: s.decl_aligned_vars(tst0, tst1) s.decl_aligned_vars(rp, find, ind, y) @@ -280,10 +282,11 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): s.append('{} = {rp} - floor({rp});'.format(tst1, rp=rp)) with self._ordered_wi_execution_(barrier=False): code = 'printf("%lu p={vnf}, rp={vnf}, floor(rp)={vnf}, rp-floor(rp)={vnf}, ind={vni}, y={vnf}, s0={vnf}.\\n", {}, {}, {}, {}, {}, {}, {}, {});'.format( - 'get_local_id(0)', epv(pos), epv(rp), epv(tst0), epv(tst1), epv(ind), epv(y), epv(scalars[0]), - vnf=vnf, vni=vni) + 'get_local_id(0)', epv(pos), epv(rp), epv(tst0), epv( + tst1), epv(ind), epv(y), epv(scalars[0]), + vnf=vnf, vni=vni) s.append(code) - y.affect(s, init='{}-{}'.format(vone,y)) + y.affect(s, init='{}-{}'.format(vone, y)) ind.affect(s, init='{} - {} - {}'.format(ind, P, line_offset)) if debug_mode: s.barrier(_local=True) @@ -299,33 +302,35 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): with if_thread_active(): s.append('{} += 1;'.format(ind)) with s._align_() as al: - for wj,Pij,yj in zip(weights, Pi, (y,'1-{}'.format(y))): + for wj, Pij, yj in zip(weights, Pi, (y, '1-{}'.format(y))): wj.affect(al, align=True, init=Pij(y=yj)) if poly_splitted: - if nparticles>1: + if nparticles > 1: w.affect(al, align=True, init='select({}, {}, ({}<{}))'.format( - wr, wl, y, tg.dump(0.5))) + wr, wl, y, tg.dump(0.5))) else: w.affect(al, align=True, init='convert_{fvtype}({}<{})*{} + convert_{fvtype}({}>={})*{}'.format( - y,tg.dump(0.5), wl, y, tg.dump(0.5), wr, fvtype=fvtype)) + y, tg.dump(0.5), wl, y, tg.dump(0.5), wr, fvtype=fvtype)) for iscal, (cached_scalar, scalar) in enumerate(zip(cached_scalars, scalars)): - if nparticles>1: - comment='Remeshing scalar {}, {} particles at a time.'.format(iscal, nparticles) + if nparticles > 1: + comment = 'Remeshing scalar {}, {} particles at a time.'.format( + iscal, nparticles) s.jumpline() s.comment(comment) criterias = [] with s._block_(): for ipart in xrange(nparticles): - cache_idx='{}+{}'.format(cache_ghosts, ind[ipart]) - val = '{}*{}'.format(w[ipart],scalar[ipart]) + cache_idx = '{}+{}'.format(cache_ghosts, ind[ipart]) + val = '{}*{}'.format(w[ipart], scalar[ipart]) if (remesh_criteria_eps is not None): criteria = 'fabs({}) > {}*eps'.format(val, remesh_criteria_eps) criterias.append(criteria) if use_atomics: atomic_add = s.reqs['atomic_add'] - atom_add = atomic_add(p='{}+{}'.format(cached_scalar,cache_idx), val=val) + atom_add = atomic_add( + p='{}+{}'.format(cached_scalar, cache_idx), val=val) if (remesh_criteria_eps is not None): if use_short_circuit: code = '({}) && ({},true);'.format(criteria, atom_add) @@ -337,22 +342,23 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): inplace_add = '{} += {}'.format(cached_scalar[cache_idx], val) if (remesh_criteria_eps is not None): if use_short_circuit: - code = '({}) && (({}),true);'.format(criteria, inplace_add) + code = '({}) && (({}),true);'.format( + criteria, inplace_add) else: code = 'if ({}) {{ {}; }}'.format(criteria, inplace_add) else: code = '{};'.format(inplace_add) s.append(code) if debug_mode: - val='{}*{}'.format(w,scalars[0]) - cache_idx='{}+{}'.format(cache_ghosts, ind[0]) - + val = '{}*{}'.format(w, scalars[0]) + cache_idx = '{}+{}'.format(cache_ghosts, ind[0]) + if poly_splitted: printf = 'printf("BATCH {}: %lu remeshed {vnf} at idx={vni} with Wl={vnf}, Wr={vnf}, W={vnf}, cond={vni}.\\n",{},{},{},{},{},{},{});'.format( - i, 'get_local_id(0)', epv(val), epv(ind), epv(wl), epv(wr), epv(w), epv('(y<0.5f)'), vnf=vnf, vni=vni) + i, 'get_local_id(0)', epv(val), epv(ind), epv(wl), epv(wr), epv(w), epv('(y<0.5f)'), vnf=vnf, vni=vni) else: printf = 'printf("BATCH {}: %lu remeshed {vnf} at idx {vni} with W({vnf})={vnf}, new value is S0[{vni}]={vnf}.\\n",{},{},{},{},{},{},{});'.format( - i, 'get_local_id(0)', epv(val), epv(ind), epv(y), epv(w), cache_idx, epv(cached_scalars[0][cache_idx]), vni=vni, vnf=vnf) + i, 'get_local_id(0)', epv(val), epv(ind), epv(y), epv(w), cache_idx, epv(cached_scalars[0][cache_idx]), vni=vni, vnf=vnf) with s._first_wg_execution_(): if criterias: with s._if_(' || '.join(map(lambda x: '({})'.format(x), criterias))): @@ -370,6 +376,7 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): if use_atomics: s.barrier(_local=True) + if __name__ == '__main__': from hysop.backend.device.codegen.base.test import _test_typegen from hysop.numerics.remesh.remesh import RemeshKernel @@ -386,7 +393,7 @@ if __name__ == '__main__': remesh_criteria_eps = None drf = DirectionalRemeshFunction(tg, work_dim, 'int', tg.fbtype, - nparticles, nscalars, - (BoundaryCondition.PERIODIC,BoundaryCondition.PERIODIC), - kernel, use_atomics, remesh_criteria_eps, debug_mode=False) + nparticles, nscalars, + (BoundaryCondition.PERIODIC, BoundaryCondition.PERIODIC), + kernel, use_atomics, remesh_criteria_eps, debug_mode=False) drf.edit() diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py index e28b37f828e074427853760b4ec10021fe69058d..9c1a437ac945caafa4301a7d9550efc2e6b9069f 100644 --- a/hysop/backend/device/codegen/kernels/directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/directional_remesh.py @@ -11,23 +11,24 @@ from hysop.constants import DirectionLabels, BoundaryCondition, Backend, Precisi from hysop.core.arrays.all import OpenClArray from hysop.numerics.remesh.remesh import RemeshKernel +from hysop.numerics.remesh.kernel_generator import Kernel from hysop.fields.continuous_field import Field from hysop.fields.discrete_field import DiscreteScalarFieldView -from hysop.backend.device.opencl import cl, clTools, clCharacterize -from hysop.backend.device.opencl.opencl_env import OpenClEnvironment -from hysop.backend.device.opencl.opencl_types import OpenClTypeGen +from hysop.backend.device.opencl import cl, clTools, clCharacterize +from hysop.backend.device.opencl.opencl_env import OpenClEnvironment +from hysop.backend.device.opencl.opencl_types import OpenClTypeGen from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend -from hysop.backend.device.codegen import CodeGeneratorWarning -from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict -from hysop.backend.device.codegen.base.statistics import WorkStatistics -from hysop.backend.device.codegen.base.variables import CodegenStruct -from hysop.backend.device.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct +from hysop.backend.device.codegen import CodeGeneratorWarning +from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict +from hysop.backend.device.codegen.base.statistics import WorkStatistics +from hysop.backend.device.codegen.base.variables import CodegenStruct +from hysop.backend.device.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.device.codegen.base.kernel_codegen import KernelCodeGenerator -from hysop.backend.device.codegen.base.variables import CodegenVariable, \ - CodegenVectorClBuiltin, CodegenArray +from hysop.backend.device.codegen.base.variables import CodegenVariable, \ + CodegenVectorClBuiltin, CodegenArray from hysop.backend.device.codegen.functions.directional_remesh import DirectionalRemeshFunction @@ -36,23 +37,24 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): @staticmethod def codegen_name(work_dim, - remesh_kernel, ftype, - nparticles, nscalars, - remesh_criteria_eps, - use_atomics, is_inplace): + remesh_kernel, ftype, + nparticles, nscalars, + remesh_criteria_eps, + use_atomics, is_inplace): inplace = 'inplace_' if is_inplace else '' - atomic = 'atomic_' if use_atomics else '' - criteria = '{}eps__'.format(remesh_criteria_eps) if (remesh_criteria_eps is not None) else 'full' + atomic = 'atomic_' if use_atomics else '' + criteria = '{}eps__'.format(remesh_criteria_eps) if ( + remesh_criteria_eps is not None) else 'full' return 'directional_{}{}remesh_{}d__lambda_{}_{}__{}__{}p__{}s__{}'.format( - inplace, atomic, work_dim, - remesh_kernel.n, remesh_kernel.r, ftype, - nparticles, nscalars, - criteria) + inplace, atomic, work_dim, + remesh_kernel.n, remesh_kernel.r, ftype, + nparticles, nscalars, + criteria) @classmethod def scalars_out_cache_ghosts(cls, scalar_cfl, remesh_kernel): assert scalar_cfl > 0.0, 'cfl <= 0.0' - assert remesh_kernel.n >= 1 , 'Bad remeshing kernel.' + assert remesh_kernel.n >= 1, 'Bad remeshing kernel.' if remesh_kernel.n > 1: assert remesh_kernel.n % 2 == 0, 'Odd remeshing kernel moments.' min_ghosts = int(1+npw.floor(scalar_cfl)+remesh_kernel.n/2) @@ -79,21 +81,21 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): Return global_work_size from effective work_size and given local_work_size global_work_size will be a multiple of local_work_size """ - work_dim = self.work_dim - work_load = [1]*work_dim if (work_load is None) else work_load + work_dim = self.work_dim + work_load = [1]*work_dim if (work_load is None) else work_load - work_size = np.asarray(work_size) - work_load = np.asarray(work_load) + work_size = np.asarray(work_size) + work_load = np.asarray(work_load) local_work_size = np.asarray(local_work_size) nparticles = self.nparticles - for i in xrange(1,work_dim): + for i in xrange(1, work_dim): assert (local_work_size[i] == 1), 'local_work_size error!' if 'local_size' in self.known_vars: assert (self.known_vars['local_size'] == local_work_size[:work_dim]).all(),\ - 'local_work_size mismatch!' + 'local_work_size mismatch!' max_global_size = self.get_max_global_size(work_size, work_load, nparticles) global_size = ((max_global_size+local_work_size-1)/local_work_size) * local_work_size @@ -105,13 +107,13 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): """ Return a tuple of required (static,dynamic,total) cache bytes per workgroup """ - work_dim = self.work_dim - ftype = self.ftype - flt_bytes = self.typegen.FLT_BYTES[ftype] + work_dim = self.work_dim + ftype = self.ftype + flt_bytes = self.typegen.FLT_BYTES[ftype] local_work_size = np.asarray(local_work_size) - sc,dc = 0,0 + sc, dc = 0, 0 count = self.nscalars*(self.nparticles*local_work_size[0]+2*self.min_ghosts) if self.local_size_known: assert (self.known_vars['local_size'] == local_work_size[:work_dim]).all() @@ -123,28 +125,27 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): dc *= flt_bytes tc = sc+dc - return (sc,dc,tc) - + return (sc, dc, tc) def __init__(self, typegen, work_dim, ftype, - nparticles, nscalars, sboundary, is_inplace, - scalar_cfl, remesh_kernel, - use_short_circuit=None, - unroll_loops=None, - group_scalars=None, - remesh_criteria_eps=None, - use_atomics = False, - symbolic_mode = False, - debug_mode = False, - tuning_mode = False, - known_vars = None): - - assert work_dim>0 and work_dim<=3 - assert nscalars>0 - assert nparticles in [1,2,4,8,16] - check_instance(sboundary,tuple,values=BoundaryCondition) - check_instance(remesh_kernel, RemeshKernel) - + nparticles, nscalars, sboundary, is_inplace, + scalar_cfl, remesh_kernel, + use_short_circuit=None, + unroll_loops=None, + group_scalars=None, + remesh_criteria_eps=None, + use_atomics=False, + symbolic_mode=False, + debug_mode=False, + tuning_mode=False, + known_vars=None): + + assert work_dim > 0 and work_dim <= 3 + assert nscalars > 0 + assert nparticles in [1, 2, 4, 8, 16] + check_instance(sboundary, tuple, values=BoundaryCondition) + check_instance(remesh_kernel, (RemeshKernel, Kernel)) + use_short_circuit = first_not_None(use_short_circuit, typegen.use_short_circuit_ops) unroll_loops = first_not_None(unroll_loops, typegen.unroll_loops) @@ -159,11 +160,11 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): assert sboundary[0] in [BoundaryCondition.PERIODIC, BoundaryCondition.NONE] assert sboundary[1] in [BoundaryCondition.PERIODIC, BoundaryCondition.NONE] - is_periodic = (sboundary[0]==BoundaryCondition.PERIODIC \ - and sboundary[1]==BoundaryCondition.PERIODIC) + is_periodic = (sboundary[0] == BoundaryCondition.PERIODIC + and sboundary[1] == BoundaryCondition.PERIODIC) if is_periodic: - msg='Local periodic boundary have been deprecated, use BoundaryCondition.NONE instead.' + msg = 'Local periodic boundary have been deprecated, use BoundaryCondition.NONE instead.' raise RuntimeError(msg) known_vars = known_vars or dict() @@ -174,58 +175,58 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): vitype = typegen.vtype(itype, nparticles) name = DirectionalRemeshKernelGenerator.codegen_name(work_dim, - remesh_kernel, ftype, - nparticles, nscalars, remesh_criteria_eps, - use_atomics, is_inplace) + remesh_kernel, ftype, + nparticles, nscalars, remesh_criteria_eps, + use_atomics, is_inplace) kernel_reqs = self.build_requirements(typegen, work_dim, itype, ftype, - sboundary, nparticles, nscalars, nfields, group_scalars, - symbolic_mode, - remesh_criteria_eps, use_atomics, remesh_kernel, - use_short_circuit, known_vars, debug_mode) + sboundary, nparticles, nscalars, nfields, group_scalars, + symbolic_mode, + remesh_criteria_eps, use_atomics, remesh_kernel, + use_short_circuit, known_vars, debug_mode) kernel_args = self.gen_kernel_arguments(typegen, work_dim, itype, ftype, - nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace, - debug_mode, kernel_reqs, known_vars, symbolic_mode) - - super(DirectionalRemeshKernelGenerator,self).__init__( - name=name, - typegen=typegen, - work_dim=work_dim, - kernel_args=kernel_args, - known_vars=known_vars, - vec_type_hint=ftype, - symbolic_mode=symbolic_mode) + nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace, + debug_mode, kernel_reqs, known_vars, symbolic_mode) + + super(DirectionalRemeshKernelGenerator, self).__init__( + name=name, + typegen=typegen, + work_dim=work_dim, + kernel_args=kernel_args, + known_vars=known_vars, + vec_type_hint=ftype, + symbolic_mode=symbolic_mode) self.update_requirements(kernel_reqs) - self.min_ghosts = self.scalars_out_cache_ghosts(scalar_cfl, remesh_kernel) - self.itype = itype - self.ftype = ftype - self.vitype = vitype - self.vftype = vftype - self.work_dim = work_dim - self.sboundary = sboundary - self.nparticles = nparticles - self.nscalars = nscalars - self.nfields = nfields - self.group_scalars = group_scalars + self.min_ghosts = self.scalars_out_cache_ghosts(scalar_cfl, remesh_kernel) + self.itype = itype + self.ftype = ftype + self.vitype = vitype + self.vftype = vftype + self.work_dim = work_dim + self.sboundary = sboundary + self.nparticles = nparticles + self.nscalars = nscalars + self.nfields = nfields + self.group_scalars = group_scalars self.local_size_known = local_size_known - self.is_inplace = is_inplace - self.use_atomics = use_atomics + self.is_inplace = is_inplace + self.use_atomics = use_atomics self.use_short_circuit = use_short_circuit - self.unroll_loops = unroll_loops - self.remesh_kernel = remesh_kernel - self.debug_mode = debug_mode - self.tuning_mode = tuning_mode + self.unroll_loops = unroll_loops + self.remesh_kernel = remesh_kernel + self.debug_mode = debug_mode + self.tuning_mode = tuning_mode self.gencode() - #self.edit() + # self.edit() def build_requirements(self, typegen, work_dim, itype, ftype, - sboundary, nparticles, nscalars, nfields, group_scalars, symbolic_mode, - remesh_criteria_eps, use_atomics, remesh_kernel, use_short_circuit, - known_vars, debug_mode): + sboundary, nparticles, nscalars, nfields, group_scalars, symbolic_mode, + remesh_criteria_eps, use_atomics, remesh_kernel, use_short_circuit, + known_vars, debug_mode): reqs = WriteOnceDict() vsize = upper_pow2_or_3(work_dim) @@ -239,49 +240,49 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): # without atomics we can only remesh on particle at a time nparticles_remeshed = nparticles if use_atomics else 1 reqs['remesh'] = DirectionalRemeshFunction(typegen=typegen, work_dim=work_dim, - itype=itype, ftype=ftype, nparticles=nparticles_remeshed, nscalars=nscalars, - sboundary=sboundary, remesh_kernel=remesh_kernel, use_atomics=use_atomics, - remesh_criteria_eps=remesh_criteria_eps, debug_mode=debug_mode, - use_short_circuit=use_short_circuit) + itype=itype, ftype=ftype, nparticles=nparticles_remeshed, nscalars=nscalars, + sboundary=sboundary, remesh_kernel=remesh_kernel, use_atomics=use_atomics, + remesh_criteria_eps=remesh_criteria_eps, debug_mode=debug_mode, + use_short_circuit=use_short_circuit) return reqs def gen_kernel_arguments(self, typegen, work_dim, itype, ftype, - nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace, - debug_mode, kernel_reqs, known_vars, symbolic_mode): + nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace, + debug_mode, kernel_reqs, known_vars, symbolic_mode): kargs = ArgDict() mesh_dim = upper_pow2_or_3(work_dim) self.position, self.position_strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='position', - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, - ptr_restrict=True, const=True) + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, + ptr_restrict=True, const=True) - scalars_data_in = [] + scalars_data_in = [] scalars_strides_in = [] for i in xrange(nfields): - args_in = [] + args_in = [] strides_in = [] for j in xrange(group_scalars[i]): if is_inplace: - arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_inout'.format(i,j), - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, - const=False, ptr_restrict=True) + arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_inout'.format(i, j), + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, + const=False, ptr_restrict=True) else: - arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_in'.format(i,j), - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, - const=True, ptr_restrict=True) + arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_in'.format(i, j), + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, + const=True, ptr_restrict=True) args_in.append(arg) strides_in.append(strides) scalars_data_in.append(tuple(args_in)) scalars_strides_in.append(tuple(strides_in)) - scalars_data_in = tuple(scalars_data_in) + scalars_data_in = tuple(scalars_data_in) scalars_strides_in = tuple(scalars_strides_in) if is_inplace: - scalars_data_out = scalars_data_in + scalars_data_out = scalars_data_in scalars_strides_out = scalars_strides_in else: scalars_data_out = [] @@ -290,10 +291,10 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): args_out = [] strides_out = [] for j in xrange(group_scalars[i]): - arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_out'.format(i,j), - known_vars=known_vars, symbolic_mode=symbolic_mode, - storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, - const=False, ptr_restrict=True) + arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_out'.format(i, j), + known_vars=known_vars, symbolic_mode=symbolic_mode, + storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim, + const=False, ptr_restrict=True) args_out.append(arg) strides_out.append(strides) scalars_data_out.append(tuple(args_out)) @@ -301,60 +302,60 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): scalars_data_out = tuple(scalars_data_out) scalars_strides_out = tuple(scalars_strides_out) - self.scalars_data_in = scalars_data_in + self.scalars_data_in = scalars_data_in self.scalars_data_out = scalars_data_out self.scalars_strides_in = scalars_strides_in self.scalars_strides_out = scalars_strides_out - #if debug_mode: - #kargs['dbg0'] = CodegenVariable(storage=self._global,name='dbg0',ctype=itype, - #typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True) - #kargs['dbg1'] = CodegenVariable(storage=self._global,name='dbg1',ctype=itype, - #typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True) + # if debug_mode: + # kargs['dbg0'] = CodegenVariable(storage=self._global,name='dbg0',ctype=itype, + # typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True) + # kargs['dbg1'] = CodegenVariable(storage=self._global,name='dbg1',ctype=itype, + # typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True) kargs['position_mesh_info'] = kernel_reqs['MeshInfoStruct'].build_codegen_variable( - const=True, name='position_mesh_info') + const=True, name='position_mesh_info') if is_inplace: for i in xrange(nfields): kargs['S{}_inout_mesh_info'.format(i)] = \ - kernel_reqs['MeshInfoStruct'].build_codegen_variable( - const=True, name='S{}_inout_mesh_info'.format(i), nl=True) + kernel_reqs['MeshInfoStruct'].build_codegen_variable( + const=True, name='S{}_inout_mesh_info'.format(i), nl=True) else: for i in xrange(nfields): kargs['S{}_in_mesh_info'.format(i)] = \ - kernel_reqs['MeshInfoStruct'].build_codegen_variable( - const=True, name='S{}_in_mesh_info'.format(i), nl=True) + kernel_reqs['MeshInfoStruct'].build_codegen_variable( + const=True, name='S{}_in_mesh_info'.format(i), nl=True) for i in xrange(nfields): kargs['S{}_out_mesh_info'.format(i)] = \ - kernel_reqs['MeshInfoStruct'].build_codegen_variable( - const=True, name='S{}_out_mesh_info'.format(i), nl=True) + kernel_reqs['MeshInfoStruct'].build_codegen_variable( + const=True, name='S{}_out_mesh_info'.format(i), nl=True) if not local_size_known: - kargs['buffer'] = CodegenVariable(storage=self._local, ctype=ftype, - add_impl_const=True, name='buffer', ptr=True, ptr_restrict=True, - typegen=typegen, nl=False) + kargs['buffer'] = CodegenVariable(storage=self._local, ctype=ftype, + add_impl_const=True, name='buffer', ptr=True, ptr_restrict=True, + typegen=typegen, nl=False) return kargs def gencode(self): - s = self + s = self tg = s.typegen - dim = s.work_dim - itype = s.itype - ftype = s.ftype - vitype = s.vitype - vftype = s.vftype - sboundary = s.sboundary - nparticles = s.nparticles - nscalars = s.nscalars - nfields = s.nfields - min_ghosts = s.min_ghosts - is_inplace = s.is_inplace + dim = s.work_dim + itype = s.itype + ftype = s.ftype + vitype = s.vitype + vftype = s.vftype + sboundary = s.sboundary + nparticles = s.nparticles + nscalars = s.nscalars + nfields = s.nfields + min_ghosts = s.min_ghosts + is_inplace = s.is_inplace use_atomics = s.use_atomics - work_dim = s.work_dim - debug_mode = s.debug_mode + work_dim = s.work_dim + debug_mode = s.debug_mode tuning_mode = s.tuning_mode use_short_circuit = s.use_short_circuit @@ -365,127 +366,128 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): local_size_known = s.local_size_known - global_id = s.vars['global_id'] - local_id = s.vars['local_id'] - group_id = s.vars['group_id'] + global_id = s.vars['global_id'] + local_id = s.vars['local_id'] + group_id = s.vars['group_id'] - global_index = s.vars['global_index'] - local_index = s.vars['local_index'] + global_index = s.vars['global_index'] + local_index = s.vars['local_index'] - global_size = s.vars['global_size'] - local_size = s.vars['local_size'] + global_size = s.vars['global_size'] + local_size = s.vars['local_size'] - #if debug_mode: - #dbg0 = s.vars['dbg0'] - #dbg1 = s.vars['dbg1'] + # if debug_mode: + #dbg0 = s.vars['dbg0'] + #dbg1 = s.vars['dbg1'] - lb = '[' if (nparticles>1) else '' - rb = ']' if (nparticles>1) else '' + lb = '[' if (nparticles > 1) else '' + rb = ']' if (nparticles > 1) else '' vnf = '{}{}{}'.format(lb, ', '.join('%2.2f' for _ in xrange(nparticles)), rb) vni = '{}{}{}'.format(lb, ', '.join('%i' for _ in xrange(nparticles)), rb) - expand_printf_vector = lambda x: str(x) if (nparticles==1) else ','.join( - '({}).s{}'.format(x, '0123456789abcdef'[i]) - if isinstance(x, str) else x[i] for i in xrange(nparticles)) + + def expand_printf_vector(x): return str(x) if (nparticles == 1) else ','.join( + '({}).s{}'.format(x, '0123456789abcdef'[i]) + if isinstance(x, str) else x[i] for i in xrange(nparticles)) epv = expand_printf_vector position_mesh_info = s.vars['position_mesh_info'] if is_inplace: - scalars_mesh_info_in = [s.vars['S{}_inout_mesh_info'.format(i)] for i in xrange(nfields)] + scalars_mesh_info_in = [ + s.vars['S{}_inout_mesh_info'.format(i)] for i in xrange(nfields)] scalars_mesh_info_out = scalars_mesh_info_in else: - scalars_mesh_info_in = [s.vars['S{}_in_mesh_info'.format(i)] for i in xrange(nfields)] - scalars_mesh_info_out = [s.vars['S{}_out_mesh_info'.format(i)] for i in xrange(nfields)] + scalars_mesh_info_in = [s.vars['S{}_in_mesh_info'.format(i)] for i in xrange(nfields)] + scalars_mesh_info_out = [s.vars['S{}_out_mesh_info'.format(i)] for i in xrange(nfields)] - position_base = s.vars['position_base'] - position_offset = s.vars['position_offset'] + position_base = s.vars['position_base'] + position_offset = s.vars['position_offset'] position_strides = s.vars['position_strides'] position = s.position - scalars_data_in = s.scalars_data_in + scalars_data_in = s.scalars_data_in scalars_data_out = s.scalars_data_out - scalars_strides_in = s.scalars_strides_in + scalars_strides_in = s.scalars_strides_in scalars_strides_out = s.scalars_strides_out - compute_grid_size = position_mesh_info['local_mesh']['compute_resolution'].view( - 'compute_grid_size',slice(None,work_dim),const=True) + compute_grid_size = position_mesh_info['local_mesh']['compute_resolution'].view( + 'compute_grid_size', slice(None, work_dim), const=True) position_grid_size = position_mesh_info['local_mesh']['resolution'].view( - 'pos_grid_size', slice(0,work_dim), const=True) + 'pos_grid_size', slice(0, work_dim), const=True) position_grid_ghosts = position_mesh_info['ghosts'].view( - 'pos_grid_ghosts', slice(0,work_dim), const=True) + 'pos_grid_ghosts', slice(0, work_dim), const=True) position_global_id = CodegenVectorClBuiltin('pos_gid', itype, work_dim, typegen=tg) - dx = position_mesh_info['dx'].view('dx', slice(0,1), const=True) - inv_dx = position_mesh_info['inv_dx'].view('inv_dx', slice(0,1), const=True) - xmin = position_mesh_info['local_mesh']['xmin'].view('xmin', slice(0,1), const=True) + dx = position_mesh_info['dx'].view('dx', slice(0, 1), const=True) + inv_dx = position_mesh_info['inv_dx'].view('inv_dx', slice(0, 1), const=True) + xmin = position_mesh_info['local_mesh']['xmin'].view('xmin', slice(0, 1), const=True) if is_inplace: scalars_in_grid_size = tuple(smi['local_mesh']['resolution'].view( - 'S{}_inout_grid_size'.format(i), slice(0,work_dim), const=True) - for (i,smi) in enumerate(scalars_mesh_info_out)) + 'S{}_inout_grid_size'.format(i), slice(0, work_dim), const=True) + for (i, smi) in enumerate(scalars_mesh_info_out)) scalars_in_grid_ghosts = tuple(smi['ghosts'].view( - 'S{}_inout_grid_ghosts'.format(i),slice(0,work_dim), const=True) - for (i,smi) in enumerate(scalars_mesh_info_out)) + 'S{}_inout_grid_ghosts'.format(i), slice(0, work_dim), const=True) + for (i, smi) in enumerate(scalars_mesh_info_out)) scalars_in_global_id = tuple(CodegenVectorClBuiltin('S{}_inout_gid'.format(i), - itype, work_dim, typegen=tg) for i in xrange(nfields)) + itype, work_dim, typegen=tg) for i in xrange(nfields)) - scalars_data_out_grid_size = scalars_in_grid_size + scalars_data_out_grid_size = scalars_in_grid_size scalars_data_out_grid_ghosts = scalars_in_grid_ghosts - scalars_data_out_global_id = scalars_in_global_id + scalars_data_out_global_id = scalars_in_global_id grid_ghosts = (position_grid_ghosts,) + scalars_in_grid_ghosts - grid_sizes = (position_grid_size,) + scalars_in_grid_size - global_ids = (position_global_id,) + scalars_in_global_id + grid_sizes = (position_grid_size,) + scalars_in_grid_size + global_ids = (position_global_id,) + scalars_in_global_id else: scalars_in_grid_size = tuple(smi['local_mesh']['resolution'].view( - 'S{}_in_grid_size'.format(i), slice(0,work_dim), const=True) - for (i,smi) in enumerate(scalars_mesh_info_in)) + 'S{}_in_grid_size'.format(i), slice(0, work_dim), const=True) + for (i, smi) in enumerate(scalars_mesh_info_in)) scalars_data_out_grid_size = tuple(smi['local_mesh']['resolution'].view( - 'S{}_out_grid_size'.format(i), slice(0,work_dim), const=True) - for (i,smi) in enumerate(scalars_mesh_info_out)) + 'S{}_out_grid_size'.format(i), slice(0, work_dim), const=True) + for (i, smi) in enumerate(scalars_mesh_info_out)) scalars_in_grid_ghosts = tuple(smi['ghosts'].view('S{}_in_grid_ghosts'.format(i), - slice(0,work_dim), const=True) for (i,smi) in enumerate(scalars_mesh_info_in)) + slice(0, work_dim), const=True) for (i, smi) in enumerate(scalars_mesh_info_in)) scalars_data_out_grid_ghosts = tuple(smi['ghosts'].view('S{}_out_grid_ghosts'.format(i), - slice(0,work_dim), const=True) for (i,smi) in enumerate(scalars_mesh_info_out)) + slice(0, work_dim), const=True) for (i, smi) in enumerate(scalars_mesh_info_out)) scalars_in_global_id = tuple(CodegenVectorClBuiltin('S{}_in_gid'.format(i), - itype, work_dim, typegen=tg) for i in xrange(nfields)) + itype, work_dim, typegen=tg) for i in xrange(nfields)) scalars_data_out_global_id = tuple(CodegenVectorClBuiltin('S{}_out_gid'.format(i), - itype, work_dim, typegen=tg) for i in xrange(nfields)) + itype, work_dim, typegen=tg) for i in xrange(nfields)) grid_ghosts = (position_grid_ghosts,) + scalars_in_grid_ghosts + \ - scalars_data_out_grid_ghosts - grid_sizes = (position_grid_size,) + scalars_in_grid_size + scalars_data_out_grid_size - global_ids = (position_global_id,) + scalars_in_global_id + scalars_data_out_global_id + scalars_data_out_grid_ghosts + grid_sizes = (position_grid_size,) + scalars_in_grid_size + scalars_data_out_grid_size + global_ids = (position_global_id,) + scalars_in_global_id + scalars_data_out_global_id s.update_vars(position=position, - inv_dx=inv_dx, - position_grid_ghosts=position_grid_ghosts, compute_grid_size=compute_grid_size) - s.update_vars(**dict((sij.name,sij) for si in scalars_data_in for sij in si)) + inv_dx=inv_dx, + position_grid_ghosts=position_grid_ghosts, compute_grid_size=compute_grid_size) + s.update_vars(**dict((sij.name, sij) for si in scalars_data_in for sij in si)) if not is_inplace: - s.update_vars(**dict((sij.name,sij) for si in scalars_data_out for sij in si)) - + s.update_vars(**dict((sij.name, sij) for si in scalars_data_out for sij in si)) npart = CodegenVariable(name='nparticles', ctype=itype, typegen=tg, value=nparticles) cache_ghosts = CodegenVariable('cache_ghosts', itype, typegen=tg, - value=min_ghosts, - symbolic_mode=symbolic_mode) - cache_width = CodegenVariable('cache_width', itype, typegen=tg, - init='{}*{} + 2*{}'.format(npart, local_size[0], cache_ghosts)) + value=min_ghosts, + symbolic_mode=symbolic_mode) + cache_width = CodegenVariable('cache_width', itype, typegen=tg, + init='{}*{} + 2*{}'.format(npart, local_size[0], cache_ghosts)) local_work = CodegenVariable('lwork', 'int', tg, const=True, - init='{}*{}'.format(nparticles,local_size[0])) + init='{}*{}'.format(nparticles, local_size[0])) - local_offset = CodegenVariable('local_offset',itype,tg) - line_offset = CodegenVariable('line_offset',itype,tg) - particle_offset = CodegenVariable('particle_offset',itype,tg) + local_offset = CodegenVariable('local_offset', itype, tg) + line_offset = CodegenVariable('line_offset', itype, tg) + particle_offset = CodegenVariable('particle_offset', itype, tg) def _line_init(base_ptr, global_id, grid_strides): - _id='' + _id = '' if work_dim > 1: _id += '$ + ({} $* {})'.format(global_id[work_dim-1], grid_strides[work_dim-1]) for i in xrange(work_dim-2, 0, -1): @@ -494,16 +496,16 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): return '{}{}'.format(base_ptr, _id) line_position = position.newvar('line_position', - init=_line_init(position, position_global_id, position_strides)) + init=_line_init(position, position_global_id, position_strides)) - line_scalars_in = [] + line_scalars_in = [] line_scalars_data_out = [] for (Si, Si_global_id, Si_grid_strides) in \ zip(scalars_data_in, scalars_in_global_id, scalars_strides_in): Li = [] for (Sij, Sij_strides) in zip(Si, Si_grid_strides): lij = Sij.newvar('line_{}'.format(Sij.name), - init=_line_init(Sij, Si_global_id, Sij_strides)) + init=_line_init(Sij, Si_global_id, Sij_strides)) Li.append(lij) line_scalars_in.append(tuple(Li)) for (Si, Si_global_id, Si_grid_strides) in \ @@ -511,10 +513,10 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): Li = [] for (Sij, Sij_strides) in zip(Si, Si_grid_strides): lij = Sij.newvar('line_{}'.format(Sij.name), - init=_line_init(Sij, Si_global_id, Sij_strides)) + init=_line_init(Sij, Si_global_id, Sij_strides)) Li.append(lij) line_scalars_data_out.append(tuple(Li)) - line_scalars_in = tuple(line_scalars_in) + line_scalars_in = tuple(line_scalars_in) line_scalars_data_out = tuple(line_scalars_data_out) line_vars = ((line_position,),) + line_scalars_in @@ -526,85 +528,85 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): if local_size_known: L = s.known_vars['local_size'] for i in xrange(nfields): - Si = [] + Si = [] BSi = [] for j in xrange(group_scalars[i]): - Sij = CodegenArray(name='S{}_{}'.format(i,j), - dim=1, ctype=ftype, typegen=tg, - shape=(nparticles*L[0]+2*min_ghosts,), storage=self._local) + Sij = CodegenArray(name='S{}_{}'.format(i, j), + dim=1, ctype=ftype, typegen=tg, + shape=(nparticles*L[0]+2*min_ghosts,), storage=self._local) Si.append(Sij) cached_scalars.append(tuple(Si)) else: buf = self.vars['buffer'] - k=0 + k = 0 for i in xrange(nfields): - Si = [] + Si = [] BSi = [] for j in xrange(group_scalars[i]): - Sij = CodegenVariable(name='S{}_{}'.format(i,j),ctype=ftype,typegen=tg, - ptr_restrict=True, ptr=True, storage=self._local, - ptr_const=True, - init='{} + {}*{}'.format(buf,k,cache_width)) + Sij = CodegenVariable(name='S{}_{}'.format(i, j), ctype=ftype, typegen=tg, + ptr_restrict=True, ptr=True, storage=self._local, + ptr_const=True, + init='{} + {}*{}'.format(buf, k, cache_width)) Si.append(Sij) - k+=1 + k += 1 cached_scalars.append(tuple(Si)) - cache_scalars = tuple(cached_scalars) + cache_scalars = tuple(cached_scalars) boundary_scalars = tuple(boundary_scalars) - pos = CodegenVectorClBuiltin('p', ftype, nparticles, tg) - tuning_pos = CodegenVectorClBuiltin('tp', ftype, nparticles, tg) + pos = CodegenVectorClBuiltin('p', ftype, nparticles, tg) + tuning_pos = CodegenVectorClBuiltin('tp', ftype, nparticles, tg) scalars = [] for i in xrange(nfields): si = [] for j in xrange(group_scalars[i]): - sij = CodegenVectorClBuiltin('s{}_{}'.format(i,j), ftype, nparticles, tg) + sij = CodegenVectorClBuiltin('s{}_{}'.format(i, j), ftype, nparticles, tg) si.append(sij) scalars.append(tuple(si)) scalars = tuple(scalars) - - vzero = CodegenVectorClBuiltin('vzero', ftype, nparticles, tg, - value=np.zeros(shape=(nparticles,), dtype=np.float64)) + vzero = CodegenVectorClBuiltin('vzero', ftype, nparticles, tg, + value=np.zeros(shape=(nparticles,), dtype=np.float64)) kmax = CodegenVariable('kmax', itype, tg, const=True, - init='(({}+{lwork}-1)/{lwork})'.format( - compute_grid_size[0], lwork=local_work)) + init='(({}+{lwork}-1)/{lwork})'.format( + compute_grid_size[0], lwork=local_work)) - loopvars = 'kji' - first = CodegenVariable('first', 'bool', tg, const=True, init='({}==0)'.format(loopvars[0])) - last = CodegenVariable('last', 'bool', tg, const=True, init='({}=={}-1)'.format(loopvars[0],kmax)) - active = CodegenVariable('active','bool', tg, const=True) - last_active = CodegenVariable('last_active','bool', tg, const=True) + loopvars = 'kji' + first = CodegenVariable('first', 'bool', tg, const=True, init='({}==0)'.format(loopvars[0])) + last = CodegenVariable('last', 'bool', tg, const=True, + init='({}=={}-1)'.format(loopvars[0], kmax)) + active = CodegenVariable('active', 'bool', tg, const=True) + last_active = CodegenVariable('last_active', 'bool', tg, const=True) last_particle = CodegenVariable('last_particle', itype, tg, const=True, - init='{} - {}*({}-1)*{}'.format(compute_grid_size[0], nparticles, kmax, local_size[0])) + init='{} - {}*({}-1)*{}'.format(compute_grid_size[0], nparticles, kmax, local_size[0])) @contextlib.contextmanager def _work_iterate_(i): try: - if i==0: + if i == 0: fval = '0' gsize = '1' N = kmax - ghosts = '({}-{})'.format(position_grid_ghosts[i],cache_ghosts) + ghosts = '({}-{})'.format(position_grid_ghosts[i], cache_ghosts) else: fval = global_id.fval(i) gsize = global_size[i] - N = '{Sx}'.format(Sx=compute_grid_size[i]) + N = '{Sx}'.format(Sx=compute_grid_size[i]) ghosts = position_grid_ghosts[i] with s._for_('int {i}={fval}; {i}<{N}; {i}+={gsize}'.format( - i=loopvars[i], fval=fval, gsize=gsize,N=N), - unroll=(i==0) and unroll_loops) as ctx: + i=loopvars[i], fval=fval, gsize=gsize, N=N), + unroll=(i == 0) and unroll_loops) as ctx: - if i==0: + if i == 0: with s._align_() as al: line_offset.declare(al, align=True, const=True, init='{}*{}'.format( loopvars[0], local_work)) local_offset.declare(al, align=True, const=True, init='{}*{}'.format( - nparticles,local_id[0])) + nparticles, local_id[0])) particle_offset.declare(al, align=True, const=True, init='{} + {}'.format( line_offset, local_offset)) s.jumpline() @@ -612,19 +614,19 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): first.declare(al, align=True) last.declare(al, align=True) active.declare(al, init='({} < {})'.format(particle_offset, - compute_grid_size[0]), align=True) + compute_grid_size[0]), align=True) last_active.declare(al, init='{} && ({}+{}-1 >= {})'.format(active, particle_offset, - nparticles, compute_grid_size[0]), align=True) - elif i==1: + nparticles, compute_grid_size[0]), align=True) + elif i == 1: kmax.declare(s) last_particle.declare(s) s.jumpline() - if i>0: + if i > 0: with s._align_() as al: - for field_gid, field_ghosts in zip(global_ids, grid_ghosts) : - al.append('{} $= {} $+ {};'.format(field_gid[i], - loopvars[i], field_ghosts[i])) + for field_gid, field_ghosts in zip(global_ids, grid_ghosts): + al.append('{} $= {} $+ {};'.format(field_gid[i], + loopvars[i], field_ghosts[i])) al.jumpline() yield ctx except: @@ -651,7 +653,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): def if_first_or_last_thread_active(): #cond= '{} || {}'.format(last_active, first) - cond= '{}'.format(last_active) + cond = '{}'.format(last_active) return if_last_thread_active(cond) with s._kernel_(): @@ -683,15 +685,15 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): s.decl_vars(*global_ids) with s._align_() as al: - _pos=(pos,) + _pos = (pos,) if tuning_mode: - _pos+=(tuning_pos,) - s.decl_vars(*(_pos+tuple(sij for si in scalars for sij in si)), align=True, codegen=al) + _pos += (tuning_pos,) + s.decl_vars(*(_pos+tuple(sij for si in scalars for sij in si)), + align=True, codegen=al) s.jumpline() - - nested_loops = [_work_iterate_(i) for i in xrange(dim-1,-1,-1)] - if work_dim==1: + nested_loops = [_work_iterate_(i) for i in xrange(dim-1, -1, -1)] + if work_dim == 1: kmax.declare(s) last_particle.declare(s) with contextlib.nested(*nested_loops): @@ -699,38 +701,39 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): s.comment('Compute global offsets and line pointers') with s._align_() as al: position_global_id.affect(al, i=0, align=True, - init='{} + {}'.format(particle_offset, position_grid_ghosts[0])) + init='{} + {}'.format(particle_offset, position_grid_ghosts[0])) if not is_inplace: for _gid, _ghosts in zip(scalars_in_global_id, scalars_in_grid_ghosts): _gid.affect(al, i=0, align=True, - init='{} + {}'.format(particle_offset, _ghosts[0])) + init='{} + {}'.format(particle_offset, _ghosts[0])) for _gid, _ghosts in zip(scalars_data_out_global_id, scalars_data_out_grid_ghosts): _gid.affect(al, i=0, align=True, - init='{} + {} - {}'.format(particle_offset, _ghosts[0], - cache_ghosts)) + init='{} + {} - {}'.format(particle_offset, _ghosts[0], + cache_ghosts)) s.jumpline() s.decl_aligned_vars(*(lvij for lvi in line_vars for lvij in lvi)) s.comment('Get back left cache from right cache') with s._for_('int l={}; l<2*{}; l+={}'.format(local_id[0], - cache_ghosts, local_size[0]), unroll=unroll_loops): + cache_ghosts, local_size[0]), unroll=unroll_loops): with s._if_('{}'.format(first)): - with s._align_() as al: - for csi in cached_scalars: - for csij in csi: - csij.affect(al, align=True, - i='l', - init=tg.dump(+0.0)) + with s._align_() as al: + for csi in cached_scalars: + for csij in csi: + csij.affect(al, align=True, + i='l', + init=tg.dump(+0.0)) with s._else_(): - end_id='{}+{}'.format(local_work, 'l') + end_id = '{}+{}'.format(local_work, 'l') if debug_mode: - s.append('printf("%i loaded back %2.2f from position %i.\\n", {}, {}, {});'.format(local_id[0], cached_scalars[0][0][end_id], end_id)) + s.append('printf("%i loaded back %2.2f from position %i.\\n", {}, {}, {});'.format( + local_id[0], cached_scalars[0][0][end_id], end_id)) with s._align_() as al: for csi in cached_scalars: - for csij in csi: + for csij in csi: csij.affect(al, align=True, - i='l', - init=csij[end_id]) + i='l', + init=csij[end_id]) s.barrier(_local=True) s.jumpline() @@ -741,14 +744,17 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): s.append(s.vstore(nparticles, csij, offset, vzero)) s.jumpline() - s.comment('Load position and scalars at current index, {} particles at a time.'.format(nparticles)) - for al, active_cond, k in if_last_thread_active(): - load = self.vload(1, line_position, '{}+{}'.format(particle_offset,k), align=True) + s.comment( + 'Load position and scalars at current index, {} particles at a time.'.format(nparticles)) + for al, active_cond, k in if_last_thread_active(): + load = self.vload(1, line_position, + '{}+{}'.format(particle_offset, k), align=True) if use_short_circuit: - code = '{} $= ({} $? {} $: {});'.format(pos[k], active_cond, load, tg.dump(0.0)) + code = '{} $= ({} $? {} $: {});'.format( + pos[k], active_cond, load, tg.dump(0.0)) else: code = 'if ({}) {{ {posk} $= {}; $}} $else {{ {posk} $= {}; $}}'.format(active_cond, - load, tg.dump(0.0), posk=pos[k]) + load, tg.dump(0.0), posk=pos[k]) al.append(code) if is_inplace: _id = '{}+{}+{}'.format(particle_offset, cache_ghosts, k) @@ -758,15 +764,16 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): for sij, line_sij in zip(si, line_si): load = self.vload(1, line_sij, _id, align=True) if use_short_circuit: - code = '{} $= ({} $? {} $: {});'.format(sij[k], active_cond, load, tg.dump(0.0)) + code = '{} $= ({} $? {} $: {});'.format( + sij[k], active_cond, load, tg.dump(0.0)) else: code = 'if ({}) {{ {sijk} $= {}; $}} $else {{ {sijk} $= {}; $}}'.format(active_cond, - load, tg.dump(0.0), sijk=sij[k]) + load, tg.dump(0.0), sijk=sij[k]) al.append(code) with if_thread_active(): with s._align_() as al: pos.affect(al, align=True, - init=self.vload(nparticles, line_position, particle_offset, align=True)) + init=self.vload(nparticles, line_position, particle_offset, align=True)) if is_inplace: _id = '{}+{}'.format(particle_offset, cache_ghosts) @@ -775,7 +782,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): for si, line_si in zip(scalars, line_scalars_in): for sij, line_sij in zip(si, line_si): sij.affect(al, align=True, - init=self.vload(nparticles, line_sij, _id, align=True)) + init=self.vload(nparticles, line_sij, _id, align=True)) with s._else_(): for k in xrange(nparticles): code = '{} = NAN;'.format(pos[k]) @@ -790,8 +797,8 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): if tuning_mode: code = '{} = max(min({pos}, ({pid}+{one})*{dx}), ({pid}-{one})*{dx});'.format( - tuning_pos, pos=pos, dx=dx, - pid=particle_offset, one=tg.dump(1.0)) + tuning_pos, pos=pos, dx=dx, + pid=particle_offset, one=tg.dump(1.0)) s.append(code) if debug_mode: @@ -799,37 +806,38 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): s.append('printf("\\n");') s.append('printf("\\nGLOBAL SCALAR VALUES\\n");') with s._for_('int ii=0; ii<{}; ii++'.format(compute_grid_size[0])): - s.append('printf("%2.2f, ", {}[cache_ghosts+ii]);'.format(line_scalars_in[0][0])) + s.append( + 'printf("%2.2f, ", {}[cache_ghosts+ii]);'.format(line_scalars_in[0][0])) s.append('printf("\\n\\n");') with s._ordered_wi_execution_(barrier=True): - s.append('printf("lid.x=%i, k=%i/%i, line_offset=%i, local_offset=%i, poffset=%i, first=%i, last=%i, active=%i, last_active=%i, p={vnf}, s0={vnf}\\n", {},k,kmax,line_offset,local_offset,particle_offset,first,last,active,last_active,{},{});'.format(local_id[0], epv(pos), epv(scalars[0][0]), vnf=vnf)) + s.append('printf("lid.x=%i, k=%i/%i, line_offset=%i, local_offset=%i, poffset=%i, first=%i, last=%i, active=%i, last_active=%i, p={vnf}, s0={vnf}\\n", {},k,kmax,line_offset,local_offset,particle_offset,first,last,active,last_active,{},{});'.format( + local_id[0], epv(pos), epv(scalars[0][0]), vnf=vnf)) s.barrier(_local=True) - + with s._first_wi_execution_(): s.append('printf("\\nLEFT SCALAR CACHE (BEFORE REMESH)\\n");') with s._for_('int ii=0; ii<cache_width; ii++'): s.append('printf("%2.2f, ", S0_0[ii]);') s.append('printf("\\n");') - s.comment('Remesh scalars in cache.') with s._block_(): remesh = s.reqs['remesh'] remesh_kargs = { - 'dx':dx, - 'inv_dx':inv_dx, + 'dx': dx, + 'inv_dx': inv_dx, 'cache_width': cache_width, 'cache_ghosts': cache_ghosts, 'active': active, 'line_offset': line_offset } - k=0 + k = 0 for ci in cached_scalars: for cij in ci: remesh_kargs['S{}'.format(k)] = cij - k+=1 + k += 1 if use_atomics: # with atomic summation in cache, we can remesh everything at a time @@ -837,11 +845,11 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): remesh_kargs['p'] = tuning_pos else: remesh_kargs['p'] = pos - k=0 + k = 0 for si in scalars: for sij in si: remesh_kargs['s{}'.format(k)] = sij - k+=1 + k += 1 call = remesh(**remesh_kargs) s.append('{};'.format(call)) else: @@ -849,16 +857,17 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): for p in xrange(nparticles): if debug_mode: with s._first_wi_execution_(): - s.append('printf("\\nREMESHING PARTICLES {}*k+{}:\\n");'.format(nparticles, p)) + s.append( + 'printf("\\nREMESHING PARTICLES {}*k+{}:\\n");'.format(nparticles, p)) if tuning_mode: remesh_kargs['p'] = tuning_pos[p] else: remesh_kargs['p'] = pos[p] - k=0 + k = 0 for si in scalars: for sij in si: remesh_kargs['s{}'.format(k)] = sij[p] - k+=1 + k += 1 call = remesh(**remesh_kargs) s.append('{};'.format(call)) s.barrier(_local=True, _global=True) @@ -879,21 +888,22 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): for al, active_cond, k in if_first_or_last_thread_active(): for si, li in zip(cached_scalars, line_scalars_data_out): for sij, lij in zip(si, li): - load = self.vload(1, sij, '{}+{}'.format(local_offset, k)) + load = self.vload(1, sij, '{}+{}'.format(local_offset, k)) _id = '{}+{}'.format(particle_offset, k) - store = self.vstore(1, lij, _id, load, align=True, jmp=True, suppress_semicolon=True) + store = self.vstore(1, lij, _id, load, align=True, + jmp=True, suppress_semicolon=True) if use_short_circuit: code = '{} $&& (({}),true)'.format(active_cond, store) if debug_mode: code += ' && (printf("last_active %i wrote %2.2f from cache position %i to global id %i.\\n", {},{},{},{}));'.format( - local_id[0], load, '{}+{}'.format(local_offset,k), _id) + local_id[0], load, '{}+{}'.format(local_offset, k), _id) else: code += ';' else: code = 'if ({}) ${{ {}; '.format(active_cond, store) if debug_mode: code += 'printf("last_active %i wrote %2.2f from cache position %i to global id %i.\\n", {},{},{},{});'.format( - local_id[0], load, '{}+{}'.format(local_offset,k), _id) + local_id[0], load, '{}+{}'.format(local_offset, k), _id) code += '}' al.append(code) with if_thread_active(not_first=False): @@ -906,73 +916,78 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): vnf=vnf)) for ci, li in zip(cached_scalars, line_scalars_data_out): for cij, lij in zip(ci, li): - load = self.vload(nparticles, cij, '{}'.format(local_offset)) + load = self.vload(nparticles, cij, '{}'.format(local_offset)) _id = particle_offset - store = self.vstore(nparticles, lij, _id, load, align=True, jmp=True) + store = self.vstore(nparticles, lij, _id, + load, align=True, jmp=True) al.append(store) s.jumpline() with s._if_('{}'.format(last)): with s._for_('int l={}; l<2*{}; l+={}'.format(local_id[0], - cache_ghosts, local_size[0]), unroll=unroll_loops): + cache_ghosts, local_size[0]), unroll=unroll_loops): with s._align_() as al: _gid = '{}+{}+{}'.format(line_offset, last_particle, 'l') for ci, li in zip(cached_scalars, line_scalars_data_out): - for cij, lij in zip(ci,li): - init=cij['{}+{}'.format(last_particle, 'l')] + for cij, lij in zip(ci, li): + init = cij['{}+{}'.format(last_particle, 'l')] lij.affect(i=_gid, init=init, align=True, codegen=al) if debug_mode: with s._ordered_wi_execution_(barrier=False): s.append('printf("%i initiated last write from cache position %i+%i=%i to global id %i+%i+%i=%i with value %2.2f.\\n", {}, {},{},{}, {},{},{},{}, {});'.format( - local_id[0], last_particle, 'l', '{}+l'.format(last_particle), - line_offset, last_particle, 'l', '{}+{}+l'.format(line_offset, last_particle), + local_id[0], last_particle, 'l', '{}+l'.format( + last_particle), + line_offset, last_particle, 'l', '{}+{}+l'.format( + line_offset, last_particle), 'S0_0[{}+l]'.format(last_particle))) s.barrier(_local=True, _global=True) + if __name__ == '__main__': from hysop.backend.device.opencl import cl from hysop.backend.device.codegen.base.test import _test_mesh_info, _test_typegen from hysop.numerics.remesh.remesh import RemeshKernel - kernel = RemeshKernel(4,2, split_polys=False) + kernel = RemeshKernel(4, 2, split_polys=False) - work_dim=3 - ghosts=(0,0,0) - sresolution =(1024,512,256) - local_size = (128,1,1) - global_size = (16050,55,440) + work_dim = 3 + ghosts = (0, 0, 0) + sresolution = (1024, 512, 256) + local_size = (128, 1, 1) + global_size = (16050, 55, 440) tg = _test_typegen('float', 'hex') - (_,smesh_info) = _test_mesh_info('scalars_mesh_info',tg,work_dim,ghosts,sresolution) - (_,pmesh_info) = _test_mesh_info('position_mesh_info',tg,work_dim,ghosts,sresolution) + (_, smesh_info) = _test_mesh_info('scalars_mesh_info', tg, work_dim, ghosts, sresolution) + (_, pmesh_info) = _test_mesh_info('position_mesh_info', tg, work_dim, ghosts, sresolution) scalar_cfl = 1.5 dak = DirectionalRemeshKernelGenerator(typegen=tg, ftype=tg.fbtype, - work_dim=work_dim, - nparticles=4, - nscalars=2, - remesh_kernel=kernel, - scalar_cfl=scalar_cfl, - use_atomics=False, - is_inplace=True, - symbolic_mode=False, - debug_mode=False, - tuning_mode=False, - sboundary=(BoundaryCondition.NONE, BoundaryCondition.NONE), - known_vars=dict( - S0_inout_mesh_info=smesh_info, - S1_inout_mesh_info=smesh_info, - S0_in_mesh_info=smesh_info, - S1_in_mesh_info=smesh_info, - S0_out_mesh_info=smesh_info, - S1_out_mesh_info=smesh_info, - position_mesh_info=pmesh_info, - local_size=local_size[:work_dim], - global_size=global_size[:work_dim] - ) - ) + work_dim=work_dim, + nparticles=4, + nscalars=2, + remesh_kernel=kernel, + scalar_cfl=scalar_cfl, + use_atomics=False, + is_inplace=True, + symbolic_mode=False, + debug_mode=False, + tuning_mode=False, + sboundary=(BoundaryCondition.NONE, + BoundaryCondition.NONE), + known_vars=dict( + S0_inout_mesh_info=smesh_info, + S1_inout_mesh_info=smesh_info, + S0_in_mesh_info=smesh_info, + S1_in_mesh_info=smesh_info, + S0_out_mesh_info=smesh_info, + S1_out_mesh_info=smesh_info, + position_mesh_info=pmesh_info, + local_size=local_size[:work_dim], + global_size=global_size[:work_dim] + ) + ) print 'scalars_out_min_gosts = {}'.format(dak.scalars_out_cache_ghosts(scalar_cfl, kernel)) print 'required cache: {}'.format(dak.required_workgroup_cache_size(local_size)) diff --git a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py index aee89fd51eec7d32aa3a147e85fdaf8a78eb8911..4f92191c477f63c9b5f83ac01799983ec7cfb07a 100644 --- a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py +++ b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py @@ -1,10 +1,10 @@ - from hysop.deps import warnings from hysop.tools.numpywrappers import npw from hysop.tools.types import check_instance from hysop.tools.misc import upper_pow2_or_3 from hysop.constants import AutotunerFlags, BoundaryCondition from hysop.numerics.remesh.remesh import RemeshKernel +from hysop.numerics.remesh.kernel_generator import Kernel from hysop.fields.cartesian_discrete_field import CartesianDiscreteScalarFieldView from hysop.backend.device.codegen import CodeGeneratorWarning @@ -15,16 +15,17 @@ from hysop.backend.device.opencl.opencl_array import OpenClArray from hysop.backend.device.opencl.opencl_autotunable_kernel import OpenClAutotunableKernel from hysop.backend.device.kernel_autotuner import KernelGenerationError + class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel): """Autotunable interface for directional remeshing kernel code generators.""" - def autotune(self, direction, scalar_cfl, - position, scalars_in, scalars_out, is_inplace, - remesh_kernel, remesh_criteria_eps, - force_atomics, relax_min_particles, - hardcode_arrays, **kwds): + def autotune(self, direction, scalar_cfl, + position, scalars_in, scalars_out, is_inplace, + remesh_kernel, remesh_criteria_eps, + force_atomics, relax_min_particles, + hardcode_arrays, **kwds): """Autotune this kernel with specified configuration. - + hardcode_arrays means that array offset and strides can be hardcoded into the kernels as constants. """ @@ -32,152 +33,153 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel): check_instance(scalars_out, tuple, values=CartesianDiscreteScalarFieldView) precision = self.typegen.dtype - + dim = position.dim if not (1 <= dim <= 3): - msg='Dimension mismatch {}.'.format(dim) + msg = 'Dimension mismatch {}.'.format(dim) raise ValueError(msg) if not (0 <= direction < dim): - msg='Invalid direction {}.'.format(direction) + msg = 'Invalid direction {}.'.format(direction) raise ValueError(msg) if not issubclass(precision, npw.floating): - msg='Precision is not a npw.floating subtype, got {}.'.format(precision) + msg = 'Precision is not a npw.floating subtype, got {}.'.format(precision) raise TypeError(msg) - if not isinstance(scalar_cfl, float) or scalar_cfl<=0.0: - msg='Invalid scalar_cfl value {}.'.format(scalar_cfl) + if not isinstance(scalar_cfl, float) or scalar_cfl <= 0.0: + msg = 'Invalid scalar_cfl value {}.'.format(scalar_cfl) raise ValueError(msg) - if len(scalars_in)!=len(scalars_out): + if len(scalars_in) != len(scalars_out): raise ValueError('Unmatched scalars input/output.') - if not isinstance(remesh_kernel, RemeshKernel): - msg='Invalid remesh_kernel type {}.'.format(type(remesh_kernel)) + if not isinstance(remesh_kernel, RemeshKernel) and not isinstance(remesh_kernel, Kernel): + msg = 'Invalid remesh_kernel type {}.'.format(type(remesh_kernel)) raise TypeError(msg) if (remesh_criteria_eps is not None) and \ - (not isinstance(remesh_criteria_eps, float) or \ - (remesh_criteria_eps<0.0)): - msg='Invalid remesh_criteria_eps value {}.'.format(remesh_criteria_eps) + (not isinstance(remesh_criteria_eps, float) or + (remesh_criteria_eps < 0.0)): + msg = 'Invalid remesh_criteria_eps value {}.'.format(remesh_criteria_eps) raise ValueError(msg) if not isinstance(force_atomics, bool): - msg='Invalid force_atomics value {}, should be a boolean.' - msg=msg.format(force_atomics) + msg = 'Invalid force_atomics value {}, should be a boolean.' + msg = msg.format(force_atomics) raise ValueError(msg) if not isinstance(relax_min_particles, bool): - msg='Invalid relax_min_particles value {}, should be a boolean.' - msg=msg.format(relax_min_particles) + msg = 'Invalid relax_min_particles value {}, should be a boolean.' + msg = msg.format(relax_min_particles) raise ValueError(msg) if (force_atomics and relax_min_particles): - msg= 'Cannot relax min particles when force_atomics is set because ' - msg+='there is no minimum particles to be used when using atomic accumulators.' + msg = 'Cannot relax min particles when force_atomics is set because ' + msg += 'there is no minimum particles to be used when using atomic accumulators.' raise ValueError(msg) if (position.dtype != precision): - #TODO implement mixed precision kernels - msg='Array type for position {} do not match required operator precision {}.' - msg=msg.format(position.dtype, precision.__name__) + # TODO implement mixed precision kernels + msg = 'Array type for position {} do not match required operator precision {}.' + msg = msg.format(position.dtype, precision.__name__) raise NotImplementedError(msg) - - pshape = position.compute_resolution - work_size = npw.asarray(pshape, dtype=npw.int32).copy() - work_dim = work_size.size + + pshape = position.compute_resolution + work_size = npw.asarray(pshape, dtype=npw.int32).copy() + work_dim = work_size.size group_scalars = tuple(dfield.nb_components for dfield in scalars_in) - nfields = len(group_scalars) - nscalars = sum(group_scalars) - ftype = clTools.dtype_to_ctype(precision) - + nfields = len(group_scalars) + nscalars = sum(group_scalars) + ftype = clTools.dtype_to_ctype(precision) + min_s_ghosts = DirectionalRemeshKernelGenerator.scalars_out_cache_ghosts(scalar_cfl, - remesh_kernel) - + remesh_kernel) + min_nparticles = 2 if relax_min_particles else int(2*npw.ceil(scalar_cfl)) - - assert (min_s_ghosts>=1) - assert (min_nparticles>=2) - + + assert (min_s_ghosts >= 1) + assert (min_nparticles >= 2) + name = DirectionalRemeshKernelGenerator.codegen_name(work_dim, remesh_kernel, - ftype, min_nparticles, nscalars, remesh_criteria_eps, False, is_inplace) + ftype, min_nparticles, nscalars, remesh_criteria_eps, False, is_inplace) for dsin, dsout in zip(scalars_in, scalars_out): if (dsin.nb_components != dsout.nb_components): - msg='Components mismatch between input field {} and output field {}, ' - msg+='got input={}, output={}, cannot remesh.' - msg=msg.format(dsin.name, dsout.name, dsin.nb_components, dsout.nb_components) + msg = 'Components mismatch between input field {} and output field {}, ' + msg += 'got input={}, output={}, cannot remesh.' + msg = msg.format(dsin.name, dsout.name, dsin.nb_components, dsout.nb_components) raise ValueError(msg) if (dsin.compute_resolution != pshape).any() or \ (dsout.compute_resolution != pshape).any(): - msg='Resolution mismatch between particles and scalars, ' - msg+='got input={}, output={} but pshape={}, cannot remesh.' - msg=msg.format(dsin.compute_resolution, dsout.compute_resolution, pshape) + msg = 'Resolution mismatch between particles and scalars, ' + msg += 'got input={}, output={} but pshape={}, cannot remesh.' + msg = msg.format(dsin.compute_resolution, dsout.compute_resolution, pshape) raise ValueError(msg) if (dsout.ghosts[-1] < min_s_ghosts): - msg= 'Given boundary condition implies minimum ghosts numbers to be at ' - msg+='least {} in remeshed direction for output scalar but only {} ghosts ' - msg+='are present in the grid for output scalar {}.' - msg=msg.format(min_s_ghosts, dsout.ghosts[-1], dsout.name) + msg = 'Given boundary condition implies minimum ghosts numbers to be at ' + msg += 'least {} in remeshed direction for output scalar but only {} ghosts ' + msg += 'are present in the grid for output scalar {}.' + msg = msg.format(min_s_ghosts, dsout.ghosts[-1], dsout.name) raise ValueError(msg) - if is_inplace and (dsin.dfield !=dsout.dfield): - msg='Remeshing inplace but input and output discrete Field differs ' - msg+='for {} and {}.'.format(dsin.name, dsout.name) + if is_inplace and (dsin.dfield != dsout.dfield): + msg = 'Remeshing inplace but input and output discrete Field differs ' + msg += 'for {} and {}.'.format(dsin.name, dsout.name) raise ValueError(msg) - if (dsin.dtype != precision) or (dsout.dtype!=precision): - #TODO implement mixed precision kernels - msg='Array types ({}={}, {}={}) do not match required operator precision {}.' - msg=msg.format(dsin.name, dsin.dtype, dsout.name, dsout.dtype, precision.__name__) + if (dsin.dtype != precision) or (dsout.dtype != precision): + # TODO implement mixed precision kernels + msg = 'Array types ({}={}, {}={}) do not match required operator precision {}.' + msg = msg.format(dsin.name, dsin.dtype, dsout.name, dsout.dtype, precision.__name__) raise NotImplementedError(msg) - + make_offset, offset_dtype = self.make_array_offset() - make_strides, strides_dtype = self.make_array_strides(position.dim, - hardcode_arrays=hardcode_arrays) - + make_strides, strides_dtype = self.make_array_strides(position.dim, + hardcode_arrays=hardcode_arrays) + kernel_args = {} known_args = {} isolation_params = {} mesh_info_vars = {} target_args = known_args if hardcode_arrays else kernel_args - kernel_args['position_base'] = position.sdata.base_data + kernel_args['position_base'] = position.sdata.base_data target_args['position_strides'] = make_strides(position.sdata.strides, position.dtype) - target_args['position_offset'] = make_offset(position.sdata.offset, position.dtype) + target_args['position_offset'] = make_offset(position.sdata.offset, position.dtype) mesh_info_vars['position_mesh_info'] = \ - self.mesh_info('position_mesh_info', position.mesh) - isolation_params['position_base'] = dict(count=position.npoints, - dtype=position.dtype, arg_value=position.sbuffer.get()) + self.mesh_info('position_mesh_info', position.mesh) + isolation_params['position_base'] = dict(count=position.npoints, + dtype=position.dtype, arg_value=position.sbuffer.get()) arg_index = 1 + 2*(1-hardcode_arrays) if is_inplace: - for (i,dsinout) in enumerate(scalars_in): + for (i, dsinout) in enumerate(scalars_in): mi = 'S{}_inout_mesh_info'.format(i) mesh_info_vars[mi] = self.mesh_info(mi, dsinout.mesh) for j in xrange(dsinout.nb_components): - prefix = 'S{}_{}_inout'.format(i,j) - kernel_args[prefix+'_base'] = dsinout.data[j].base_data - target_args[prefix+'_strides'] = make_strides(dsinout.data[j].strides, - dsinout.dtype) - target_args[prefix+'_offset'] = make_offset(dsinout.data[j].offset, - dsinout.dtype) - isolation_params[prefix+'_base'] = dict(count=dsinout.data[j].size, - dtype=dsinout.dtype, fill=10*i+j) + prefix = 'S{}_{}_inout'.format(i, j) + kernel_args[prefix+'_base'] = dsinout.data[j].base_data + target_args[prefix+'_strides'] = make_strides(dsinout.data[j].strides, + dsinout.dtype) + target_args[prefix+'_offset'] = make_offset(dsinout.data[j].offset, + dsinout.dtype) + isolation_params[prefix+'_base'] = dict(count=dsinout.data[j].size, + dtype=dsinout.dtype, fill=10*i+j) arg_index += 1 + 2*(1-hardcode_arrays) assert i == nfields-1 else: - for (i,dsin) in enumerate(scalars_in): + for (i, dsin) in enumerate(scalars_in): mi = 'S{}_in_mesh_info'.format(i) mesh_info_vars[mi] = self.mesh_info(mi, dsin.mesh) for j in xrange(dsin.nb_components): - prefix = 'S{}_{}_in'.format(i,j) - kernel_args[prefix+'_base'] = dsin.data[j].base_data + prefix = 'S{}_{}_in'.format(i, j) + kernel_args[prefix+'_base'] = dsin.data[j].base_data target_args[prefix+'_strides'] = make_strides(dsin.data[j].strides, dsin.dtype) - target_args[prefix+'_offset'] = make_offset(dsin.data[j].offset, dsin.dtype) - isolation_params[prefix+'_base'] = dict(count=dsin.data[j].size, - dtype=dsin.dtype, fill=10*i+j) + target_args[prefix+'_offset'] = make_offset(dsin.data[j].offset, dsin.dtype) + isolation_params[prefix+'_base'] = dict(count=dsin.data[j].size, + dtype=dsin.dtype, fill=10*i+j) arg_index += 1 + 2*(1-hardcode_arrays) assert i == nfields-1 - for (i,dsout) in enumerate(scalars_out): + for (i, dsout) in enumerate(scalars_out): mi = 'S{}_out_mesh_info'.format(i) mesh_info_vars[mi] = self.mesh_info(mi, dsout.mesh) for j in xrange(dsout.nb_components): - prefix = 'S{}_{}_out'.format(i,j) - kernel_args[prefix+'_base'] = dsout.data[j].base_data - target_args[prefix+'_strides'] = make_strides(dsout.data[j].strides, dsout.dtype) - target_args[prefix+'_offset'] = make_offset(dsout.data[j].offset, dsout.dtype) - isolation_params[prefix+'_base'] = dict(count=dsout.data[j].size, - dtype=dsout.dtype, fill=0) + prefix = 'S{}_{}_out'.format(i, j) + kernel_args[prefix+'_base'] = dsout.data[j].base_data + target_args[prefix + + '_strides'] = make_strides(dsout.data[j].strides, dsout.dtype) + target_args[prefix+'_offset'] = make_offset(dsout.data[j].offset, dsout.dtype) + isolation_params[prefix+'_base'] = dict(count=dsout.data[j].size, + dtype=dsout.dtype, fill=0) arg_index += 1 + 2*(1-hardcode_arrays) assert i == nfields-1 assert len(kernel_args) == arg_index @@ -185,17 +187,16 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel): assert arg_index == (1+2*(1-hardcode_arrays))*(1+(2-is_inplace)*nscalars) return super(OpenClAutotunableDirectionalRemeshKernel, self).autotune(name=name, - position=position, scalars_in=scalars_in, scalars_out=scalars_out, - is_inplace=is_inplace, min_s_ghosts=min_s_ghosts, - precision=precision, nscalars=nscalars, group_scalars=group_scalars, - remesh_kernel=remesh_kernel, remesh_criteria_eps=remesh_criteria_eps, - force_atomics=force_atomics, min_nparticles=min_nparticles, ftype=ftype, - scalar_cfl=scalar_cfl, kernel_args=kernel_args, mesh_info_vars=mesh_info_vars, - work_dim=work_dim, work_size=work_size, - known_args=known_args, hardcode_arrays=hardcode_arrays, - offset_dtype=offset_dtype, strides_dtype=strides_dtype, - isolation_params=isolation_params, **kwds) - + position=position, scalars_in=scalars_in, scalars_out=scalars_out, + is_inplace=is_inplace, min_s_ghosts=min_s_ghosts, + precision=precision, nscalars=nscalars, group_scalars=group_scalars, + remesh_kernel=remesh_kernel, remesh_criteria_eps=remesh_criteria_eps, + force_atomics=force_atomics, min_nparticles=min_nparticles, ftype=ftype, + scalar_cfl=scalar_cfl, kernel_args=kernel_args, mesh_info_vars=mesh_info_vars, + work_dim=work_dim, work_size=work_size, + known_args=known_args, hardcode_arrays=hardcode_arrays, + offset_dtype=offset_dtype, strides_dtype=strides_dtype, + isolation_params=isolation_params, **kwds) def compute_args_mapping(self, extra_kwds, extra_parameters): """ @@ -206,87 +207,86 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel): arg_position being an int and arg_type(s) a type or tuple of types which will be checked against. """ - - is_inplace = extra_kwds['is_inplace'] - scalars_in = extra_kwds['scalars_in'] - scalars_out = extra_kwds['scalars_out'] - nscalars = extra_kwds['nscalars'] - strides_dtype = extra_kwds['strides_dtype'] - offset_dtype = extra_kwds['offset_dtype'] + + is_inplace = extra_kwds['is_inplace'] + scalars_in = extra_kwds['scalars_in'] + scalars_out = extra_kwds['scalars_out'] + nscalars = extra_kwds['nscalars'] + strides_dtype = extra_kwds['strides_dtype'] + offset_dtype = extra_kwds['offset_dtype'] hardcode_arrays = extra_kwds['hardcode_arrays'] - + args_mapping = {} arg_index = 0 args_mapping['position_base'] = (0, cl.MemoryObjectHolder) arg_index += 1 if not hardcode_arrays: - args_mapping['position_strides'] = (1, strides_dtype) - args_mapping['position_offset'] = (2, offset_dtype) + args_mapping['position_strides'] = (1, strides_dtype) + args_mapping['position_offset'] = (2, offset_dtype) arg_index += 2 if is_inplace: - for (i,dsinout) in enumerate(scalars_in): + for (i, dsinout) in enumerate(scalars_in): for j in xrange(dsinout.nb_components): - prefix = 'S{}_{}_inout'.format(i,j) + prefix = 'S{}_{}_inout'.format(i, j) args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder) - arg_index+=1 + arg_index += 1 if not hardcode_arrays: args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype) - args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype) + args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype) arg_index += 2 else: - for (i,dsin) in enumerate(scalars_in): + for (i, dsin) in enumerate(scalars_in): for j in xrange(dsin.nb_components): - prefix = 'S{}_{}_in'.format(i,j) - args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder) + prefix = 'S{}_{}_in'.format(i, j) + args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder) arg_index += 1 if not hardcode_arrays: args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype) - args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype) + args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype) arg_index += 2 - for (i,dsout) in enumerate(scalars_out): + for (i, dsout) in enumerate(scalars_out): for j in xrange(dsout.nb_components): - prefix = 'S{}_{}_out'.format(i,j) - args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder) + prefix = 'S{}_{}_out'.format(i, j) + args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder) arg_index += 1 if not hardcode_arrays: args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype) - args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype) + args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype) arg_index += 2 - assert len(args_mapping)==arg_index + assert len(args_mapping) == arg_index assert arg_index == (1+2*(1-hardcode_arrays))*(1+(2-is_inplace)*nscalars) return args_mapping - - def compute_parameters(self, extra_kwds): + def compute_parameters(self, extra_kwds): """Register extra parameters to optimize.""" check_instance(extra_kwds, dict, keys=str) - ftype = extra_kwds['ftype'] - work_dim = extra_kwds['work_dim'] - precision = extra_kwds['precision'] + ftype = extra_kwds['ftype'] + work_dim = extra_kwds['work_dim'] + precision = extra_kwds['precision'] force_atomics = extra_kwds['force_atomics'] - nparticles_options = [1,2,4,8,16] + nparticles_options = [1, 2, 4, 8, 16] use_atomics_options = [True] if force_atomics else [True, False] if True in use_atomics_options: cl_env = self.cl_env - msg=None + msg = None if ftype == 'float': if not cl_env.has_extension('cl_khr_local_int32_base_atomics'): - msg='OpenCL device {} does not support int32 atomics ' - msg+='(cl_khr_local_int32_base_atomics).' - msg=msg.format(cl_env.device.name) + msg = 'OpenCL device {} does not support int32 atomics ' + msg += '(cl_khr_local_int32_base_atomics).' + msg = msg.format(cl_env.device.name) elif ftype == 'double': if not cl_env.has_extension('cl_khr_int64_base_atomics'): - msg='OpenCL device {} does not support int64 atomics ' - msg+='(cl_khr_int64_base_atomics).' - msg=msg.format(cl_env.device.name) + msg = 'OpenCL device {} does not support int64 atomics ' + msg += '(cl_khr_int64_base_atomics).' + msg = msg.format(cl_env.device.name) else: msg = 'Atomic remeshing kernel has not been implemented for ' - msg+= '{} yet.'.format(precision) + msg += '{} yet.'.format(precision) if msg: if force_atomics: @@ -297,113 +297,112 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel): msg += '\nAtomic version of the remeshing kernel will be disabled.' warnings.warn(msg, CodeGeneratorWarning) use_atomics_options.remove(True) - + autotuner_flag = self.autotuner_config.autotuner_flag if (autotuner_flag == AutotunerFlags.ESTIMATE): if True in use_atomics_options: use_atomics_options.pop(False) - max_workitem_workload = [1,1,1] + max_workitem_workload = [1, 1, 1] elif (autotuner_flag == AutotunerFlags.MEASURE): - max_workitem_workload = [1,8,1] + max_workitem_workload = [1, 8, 1] elif (autotuner_flag == AutotunerFlags.PATIENT): - max_workitem_workload = [1,8,8] + max_workitem_workload = [1, 8, 8] elif (autotuner_flag == AutotunerFlags.EXHAUSTIVE): - max_workitem_workload = [1,16,16] + max_workitem_workload = [1, 16, 16] max_workitem_workload = npw.asarray(max_workitem_workload[:work_dim]) extra_kwds['max_work_load'] = max_workitem_workload - + params = super(OpenClAutotunableDirectionalRemeshKernel, self).compute_parameters( - extra_kwds=extra_kwds) + extra_kwds=extra_kwds) params.register_extra_parameter('use_atomics', use_atomics_options) params.register_extra_parameter('nparticles', nparticles_options) return params - + def compute_min_max_wg_size(self, work_bounds, work_load, global_work_size, - extra_parameters, extra_kwds): + extra_parameters, extra_kwds): """Default min and max workgroup size.""" cache_ghosts = extra_kwds['min_s_ghosts'] - min_wg_size = npw.ones(shape=work_bounds.work_dim, dtype=npw.int32) + min_wg_size = npw.ones(shape=work_bounds.work_dim, dtype=npw.int32) min_wg_size[0] = max(min_wg_size[0], 2*cache_ghosts) max_wg_size = npw.ones(shape=work_bounds.work_dim, dtype=npw.int32) max_wg_size[0] = max(global_work_size[0], min_wg_size[0]) return (min_wg_size, max_wg_size) - + def compute_global_work_size(self, local_work_size, work, extra_parameters, extra_kwds): gs = super(OpenClAutotunableDirectionalRemeshKernel, self).compute_global_work_size( - local_work_size=local_work_size, work=work, - extra_parameters=extra_parameters, extra_kwds=extra_kwds) + local_work_size=local_work_size, work=work, + extra_parameters=extra_parameters, extra_kwds=extra_kwds) gs[0] = local_work_size[0] return gs def generate_kernel_src(self, global_work_size, local_work_size, - extra_parameters, extra_kwds, tuning_mode, dry_run): + extra_parameters, extra_kwds, tuning_mode, dry_run): """Generate kernel name and source code""" - ## Extract usefull variables - ftype = extra_kwds['ftype'] - work_dim = extra_kwds['work_dim'] - nscalars = extra_kwds['nscalars'] - known_args = extra_kwds['known_args'] - is_inplace = extra_kwds['is_inplace'] - scalar_cfl = extra_kwds['scalar_cfl'] - remesh_kernel = extra_kwds['remesh_kernel'] - group_scalars = extra_kwds['group_scalars'] - mesh_info_vars = extra_kwds['mesh_info_vars'] - min_nparticles = extra_kwds['min_nparticles'] + # Extract usefull variables + ftype = extra_kwds['ftype'] + work_dim = extra_kwds['work_dim'] + nscalars = extra_kwds['nscalars'] + known_args = extra_kwds['known_args'] + is_inplace = extra_kwds['is_inplace'] + scalar_cfl = extra_kwds['scalar_cfl'] + remesh_kernel = extra_kwds['remesh_kernel'] + group_scalars = extra_kwds['group_scalars'] + mesh_info_vars = extra_kwds['mesh_info_vars'] + min_nparticles = extra_kwds['min_nparticles'] remesh_criteria_eps = extra_kwds['remesh_criteria_eps'] - ## Extract and check parameters - nparticles = extra_parameters['nparticles'] + # Extract and check parameters + nparticles = extra_parameters['nparticles'] use_atomics = extra_parameters['use_atomics'] if (not use_atomics) and (nparticles < min_nparticles): - msg='Insufficient number of particles, min={}, got {}.' - msg=msg.format(min_nparticles, nparticles) + msg = 'Insufficient number of particles, min={}, got {}.' + msg = msg.format(min_nparticles, nparticles) raise KernelGenerationError(msg) - ## Get compile time OpenCL known variables + # Get compile time OpenCL known variables known_vars = super(OpenClAutotunableDirectionalRemeshKernel, self).generate_kernel_src( - global_work_size=global_work_size, - local_work_size=local_work_size, - extra_parameters=extra_parameters, - extra_kwds=extra_kwds, tuning_mode=tuning_mode, dry_run=dry_run) + global_work_size=global_work_size, + local_work_size=local_work_size, + extra_parameters=extra_parameters, + extra_kwds=extra_kwds, tuning_mode=tuning_mode, dry_run=dry_run) known_vars.update(mesh_info_vars) known_vars.update(known_args) - + # disable periodic-periodic because we exchange ghosts anyways sboundaries = (BoundaryCondition.NONE, BoundaryCondition.NONE,) - - ## Generate OpenCL source code - codegen = DirectionalRemeshKernelGenerator(typegen=self.typegen, - work_dim=work_dim, ftype=ftype, - nparticles=nparticles, nscalars=nscalars, - sboundary=sboundaries, is_inplace=is_inplace, - scalar_cfl=scalar_cfl, remesh_kernel=remesh_kernel, - group_scalars=group_scalars, - remesh_criteria_eps=remesh_criteria_eps, - use_atomics = use_atomics, - symbolic_mode = self.symbolic_mode, - tuning_mode = tuning_mode, - debug_mode = False, - known_vars = known_vars) - + + # Generate OpenCL source code + codegen = DirectionalRemeshKernelGenerator(typegen=self.typegen, + work_dim=work_dim, ftype=ftype, + nparticles=nparticles, nscalars=nscalars, + sboundary=sboundaries, is_inplace=is_inplace, + scalar_cfl=scalar_cfl, remesh_kernel=remesh_kernel, + group_scalars=group_scalars, + remesh_criteria_eps=remesh_criteria_eps, + use_atomics=use_atomics, + symbolic_mode=self.symbolic_mode, + tuning_mode=tuning_mode, + debug_mode=False, + known_vars=known_vars) + kernel_name = codegen.name - kernel_src = str(codegen) + kernel_src = str(codegen) - ## Check if cache would fit + # Check if cache would fit if (local_work_size is not None): self.check_cache(codegen.required_workgroup_cache_size(local_work_size)[2]) return (kernel_name, kernel_src) - + def hash_extra_kwds(self, extra_kwds): """Hash extra_kwds dictionnary for caching purposes.""" - kwds = ('remesh_criteria_eps', 'nscalars', 'ftype', + kwds = ('remesh_criteria_eps', 'nscalars', 'ftype', 'is_inplace', 'remesh_kernel', 'work_size', 'known_args') - return self.custom_hash(*tuple(extra_kwds[kwd] for kwd in kwds), - mesh_info_vars=extra_kwds['mesh_info_vars']) - + return self.custom_hash(*tuple(extra_kwds[kwd] for kwd in kwds), + mesh_info_vars=extra_kwds['mesh_info_vars']) diff --git a/hysop/numerics/remesh/remesh.py b/hysop/numerics/remesh/remesh.py index bf2c38bd199f757eb93eb1be004ff448814a2da7..c830e0d1dbce0a9c7e844a4b25693e1987a28a3e 100644 --- a/hysop/numerics/remesh/remesh.py +++ b/hysop/numerics/remesh/remesh.py @@ -1,87 +1,101 @@ from hysop.constants import __VERBOSE__, __DEBUG__ -from hysop.tools.enum import EnumFactory +from hysop.tools.enum import EnumFactory from hysop.tools.types import check_instance from hysop.numerics.remesh.kernel_generator import Kernel, SymmetricKernelGenerator -Remesh = EnumFactory.create('Remesh', - ['L1_0', 'L2_1','L2_2','L4_2','L4_4','L6_4','L6_6','L8_4', # lambda remesh kernels - 'L2_1s','L2_2s','L4_2s','L4_4s','L6_4s','L6_6s','L8_4s', # splitted lambda remesh kernels - 'Mp4', 'Mp6', 'Mp8', # Mprimes kernels: Mp4 = M'4 = L2_1 and Mp6 = M'6 = L4_2 +Remesh = EnumFactory.create( + 'Remesh', + ['L1_0', 'L2_1', 'L2_2', 'L4_2', 'L4_4', 'L6_4', 'L6_6', 'L8_4', # lambda remesh kernels + 'L2_1s', 'L2_2s', 'L4_2s', 'L4_4s', 'L6_4s', 'L6_6s', 'L8_4s', # splitted lambda remesh kernels + 'Mp4', 'Mp6', 'Mp8', # Mprimes kernels: Mp4 = M'4 = L2_1 and Mp6 = M'6 = L4_2 + 'M4', 'M8', # M kernels 'O2', 'O4', # Corrected kernels, allow a large CFL number 'L2', # Corrected and limited lambda 2 ]) + class RemeshKernelGenerator(SymmetricKernelGenerator): - def configure(self,n): - return super(RemeshKernelGenerator,self).configure(n=n, H=None) + def configure(self, n): + return super(RemeshKernelGenerator, self).configure(n=n, H=None) + class RemeshKernel(Kernel): def __init__(self, moments, regularity, - verbose = __DEBUG__, - split_polys=False, - override_cache=False): + verbose=__DEBUG__, + split_polys=False, + override_cache=False): generator = RemeshKernelGenerator(verbose=verbose) generator.configure(n=moments) kargs = generator.solve(r=regularity, override_cache=override_cache, - split_polys=split_polys, no_wrap=True) + split_polys=split_polys, no_wrap=True) - super(RemeshKernel,self).__init__(**kargs) + super(RemeshKernel, self).__init__(**kargs) @staticmethod def from_enum(remesh): check_instance(remesh, Remesh) remesh = str(remesh) - assert remesh[0]=='L' and (remesh!='L2'), \ - 'Only lambda remesh kernels are supported.' - remesh=remesh[1:] - if remesh[-1] == 's': - remesh = remesh[:-1] - split_polys=True + assert remesh[0] == 'L' and (remesh != 'L2') or (remesh in ('M4', 'M8')), \ + 'Only lambda remesh kernels are supported.' + if remesh in ('M4', 'M8'): + # given M4 or M8 kernels + from hysop.deps import sm + x = sm.abc.x + if remesh == 'M4': + M4 = (sm.Poly((1/sm.Rational(6))*((2-x)**3-4*(1-x)**3), x), + sm.Poly((1/sm.Rational(6))*((2-x)**3), x)) + return Kernel(n=2, r=4, deg=3, Ms=2, Mh=None, H=None, remesh=True, + P=(M4[1].subs(x, -x), M4[0].subs(x, -x), M4[0], M4[1])) else: - split_polys=False - remesh = [int(x) for x in remesh.split('_')] - assert len(remesh) == 2 - assert remesh[0] >= 1 - assert remesh[1] >= 0 - return RemeshKernel(remesh[0], remesh[1], split_polys=split_polys) + remesh = remesh[1:] + if remesh[-1] == 's': + remesh = remesh[:-1] + split_polys = True + else: + split_polys = False + remesh = [int(x) for x in remesh.split('_')] + assert len(remesh) == 2 + assert remesh[0] >= 1 + assert remesh[1] >= 0 + return RemeshKernel(remesh[0], remesh[1], split_polys=split_polys) def __str__(self): return 'RemeshKernel(n={}, r={}, split={})'.format(self.n, self.r, self.poly_splitted) -if __name__=='__main__': +if __name__ == '__main__': import numpy as np from matplotlib import pyplot as plt - for i in xrange(1,5): + for i in xrange(1, 5): p = 2*i kernels = [] - for r in [1,2,4,8]: + for r in [1, 2, 4, 8]: try: - kernel = RemeshKernel(p,r) + kernel = RemeshKernel(p, r) kernels.append(kernel) except RuntimeError: - print 'Solver failed for p={} and r={}.'.format(p,r) + print 'Solver failed for p={} and r={}.'.format(p, r) - if len(kernels)==0: + if len(kernels) == 0: continue k0 = kernels[0] fig = plt.figure() plt.xlabel(r'$x$') - plt.ylabel(r'$\Lambda_{'+'{},{}'.format(p,'r')+'}$') - X = np.linspace(-k0.Ms-1,+k0.Ms+1,1000) - s = plt.subplot(1,1,1) - for i,k in enumerate(kernels): - s.plot(X,k(X),label=r'$\Lambda_{'+'{},{}'.format(p,k.r)+'}$') - s.plot(k0.I,k0.H,'or') + plt.ylabel(r'$\Lambda_{'+'{},{}'.format(p, 'r')+'}$') + X = np.linspace(-k0.Ms-1, +k0.Ms+1, 1000) + s = plt.subplot(1, 1, 1) + for i, k in enumerate(kernels): + s.plot(X, k(X), label=r'$\Lambda_{'+'{},{}'.format(p, k.r)+'}$') + s.plot(k0.I, k0.H, 'or') axe_scaling = 0.10 ylim = s.get_ylim() Ly = ylim[1] - ylim[0] - s.set_ylim(ylim[0]-axe_scaling*Ly,ylim[1]+axe_scaling*Ly) + s.set_ylim(ylim[0]-axe_scaling*Ly, ylim[1]+axe_scaling*Ly) s.legend() plt.show(block=True)