diff --git a/hysop/codegen/kernels/stretching.py b/hysop/codegen/kernels/stretching.py index 346edb25fc00b8737ba761b0c5cd0621e8046256..5842317e6344a701b30f7b19149f49dc91155b16 100644 --- a/hysop/codegen/kernels/stretching.py +++ b/hysop/codegen/kernels/stretching.py @@ -14,6 +14,10 @@ from hysop.codegen.functions.compute_index import ComputeIndexFunction from hysop.codegen.functions.gradient import GradientFunction class CachedStretchingKernel(KernelCodeGenerator): + + @staticmethod + def codegen_name(ftype,work_dim): + return 'cached_stretching_{}_{}d'.format(ftype,work_dim) def __init__(self, typegen, work_dim, order=2, ftype=None, @@ -27,7 +31,7 @@ class CachedStretchingKernel(KernelCodeGenerator): kernel_reqs = self.build_requirements(typegen,work_dim, order, cached) kernel_args = self.gen_kernel_arguments(typegen, work_dim, ftype, kernel_reqs) - name = 'cached_stretching_{}_{}d'.format(ftype,work_dim) + name = CachedStretchingKernel.codegen_name(ftype, work_dim) super(CachedStretchingKernel,self).__init__( name=name, typegen=typegen, diff --git a/hysop/codegen/maths/stencil/solver.py b/hysop/codegen/maths/stencil/solver.py.todo similarity index 100% rename from hysop/codegen/maths/stencil/solver.py rename to hysop/codegen/maths/stencil/solver.py.todo diff --git a/hysop/gpu/gpu_stretching.py b/hysop/gpu/gpu_stretching.py index 013386c75a78175e9824942feaa9106fbdb4fe9d..4efd80dae257dcd5e037873854f4b6af3c39fffb 100644 --- a/hysop/gpu/gpu_stretching.py +++ b/hysop/gpu/gpu_stretching.py @@ -51,6 +51,15 @@ class GPUStretching(DiscreteOperator, GPUOperator): device_id = get_extra_args_from_method(self, 'device_id', None), device_type = get_extra_args_from_method(self, 'device_type', None), **kwds) + + # Build options + build_opts = [] + build_opts += get_extra_args_from_method(self, 'build_opts', []) + self.build_opts = filter(None, set(build_opts)) + + # Kernel autotuner and caching options + self.autotuner_runs = get_extra_args_from_method(self, 'autotuner_runs', 10) + self.force_renew_cache = get_extra_args_from_method(self, 'force_renew_cache', False) # discrete fields self.vorticity = vorticity @@ -124,6 +133,10 @@ class GPUStretching(DiscreteOperator, GPUOperator): mesh = topo.mesh gwi = mesh.resolution + build_opts = self.build_opts + autotuner_runs = self.autotuner_runs + force_renew_cache = self.force_renew_cache + dt = typegen.make_floatn([0.01],1) from hysop.codegen.structs.mesh_info import MeshInfoStruct @@ -131,15 +144,6 @@ class GPUStretching(DiscreteOperator, GPUOperator): mesh_info_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=mesh_info) - #velocity = [ cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, - #hostbuf=np.empty(shape=gwi, dtype=dt.dtype)) for _ in xrange(work_dim) ] - #vorticity = [ cl.Buffer(context, cl.mem_flags.COPY_HOST_PTR, - #hostbuf=np.empty(shape=gwi, dtype=dt.dtype)) for _ in xrange(work_dim) ] - #kernel_args = [dt] \ - #+ vorticity \ - #+ velocity \ - #+ [mesh_info_buffer] \ - #+ [None] #local memory buffer kernel_args = [dt] \ + self.vorticity.gpu_data \ @@ -147,10 +151,9 @@ class GPUStretching(DiscreteOperator, GPUOperator): + [mesh_info_buffer] \ + [None] #local memory buffer - build_opts = [] - - autotuner = KernelAutotuner('stretching3d_cached',work_dim,typegen.fbtype,build_opts, - nruns=10,force_renew_cache=False) + name = CachedStretchingKernel.codegen_name(typegen.fbtype, work_dim) + autotuner = KernelAutotuner(name,work_dim,typegen.fbtype,build_opts, + nruns=autotuner_runs,force_renew_cache=force_renew_cache) autotuner.add_filter('3d_shape', autotuner.min_workitems_per_direction) (gwi, lwi, stats) = autotuner.bench(context, platform, device, gwi, kernel_args, @@ -160,12 +163,6 @@ class GPUStretching(DiscreteOperator, GPUOperator): (kernel, kernel_args, cached_bytes) = self._gen_and_build_kernel(lwi, gwi, kernel_args, dump_src=True, verbose=True) - #for field in velocity: - #field.release() - #for field in vorticity: - #field.release() - #kernel_args[1 :1+ work_dim] = self.vorticity.gpu_data - #kernel_args[1+work_dim:1+2*work_dim] = self.velocity.gpu_data self.kernel_args = kernel_args self.size_local_alloc += cached_bytes @@ -198,6 +195,8 @@ class GPUStretching(DiscreteOperator, GPUOperator): if dump_src: dump_folder=IO.default_path()+'/generated_kernels' codegen.to_file(dump_folder, codegen.name+'.cl') + if __VERBOSE__ or __DEBUG__: + print '{} source dumped to {}/{}.'.format(codegen.name, dump_folder, codegen.name+'.cl') cached_bytes = codegen.cache_alloc_bytes(local_size) local_mem = cl.LocalMemory(cached_bytes) @@ -222,7 +221,7 @@ class GPUStretching(DiscreteOperator, GPUOperator): output_events = [stretching_evt] for var in self.variables: - var.events.append(output_events) + var.events += output_events def apply(self, simulation): self._compute(simulation) diff --git a/hysop/gpu/tools.py b/hysop/gpu/tools.py index aa0bebd4dd3a1cc7ed129d060e7a3b89e8922894..6381d8cfc2114d794b91a85bdd678f05c9d29597 100644 --- a/hysop/gpu/tools.py +++ b/hysop/gpu/tools.py @@ -148,15 +148,19 @@ class KernelAutotuner(object): if __DEBUG__ or __VERBOSE__: print '== Kernel {} Autotuning =='.format(self.name) - print 'ctx : {}'.format(ctx) - print 'device: {}'.format(device) + print 'platform: {}'.format(platform.name) + print 'device: {}'.format(device.name) + print 'ctx: {}'.format(ctx) print 'global_size: {}'.format(global_size) + print 'fbtype: {}'.format(self.fbtype) + print 'build_opts: {}'.format(self.build_opts) config = KernelAutotuner._make_config_key(platform,device,self.fbtype,self.build_opts) if config not in self.configs.keys(): self.configs[config] = {'ctx':str(ctx),'device':str(device),'fbtype':self.fbtype,'build_opts':self.build_opts} - self.results[config] = {} self._update_configs() + if config not in self.results.keys(): + self.results[config] = {} results = self.results[config] dump_cache = False diff --git a/hysop/numerics/finite_differences.py b/hysop/numerics/finite_differences.py index dbf51487dd75011a5000718a51668e13639fe095..42bc646cabede1d0d018d1b515cd35fdef9af2c4 100644 --- a/hysop/numerics/finite_differences.py +++ b/hysop/numerics/finite_differences.py @@ -3,8 +3,9 @@ .. currentmodule hysop.numerics.finite_differences * :class:`~FDC2` : first derivative, 2nd order centered scheme -* :class:`~FD2C2`: second derivative, 2nd order centered scheme * :class:`~FDC4`: first derivative, 4th order centered scheme +* :class:`~FD2C2`: second derivative, 2nd order centered scheme +* :class `~FDC`: generic centered scheme * :class:`~FiniteDifference`, abstract base class. @@ -31,7 +32,7 @@ Note * fd are computed only on points described by indices : result[ind] = diff(field[ind]), which means that result and field must have the same shape. -* You can also used a 'reduced' output result to minimize memory. +* You can also use a 'reduced' output result to minimize memory. Use 'reduce_output_shape=True' in scheme init. In that case, result must be of a shape corresponding to indices 'shape'. @@ -145,7 +146,7 @@ class FiniteDifference(object): self.output_indices = [slice(0, result_shape[i]) for i in xrange(len(indices))] else: - assert False + raise ValueError('Invalid value for indices_out!') self._step = step self._compute_indices(step) @@ -181,7 +182,7 @@ class FiniteDifference(object): @abstractmethod def compute_and_add(self, tab, cdir, result, work): - """Apply FD scheme and add the abs of the result inplace. + """Apply FD scheme and add the result inplace. Parameters ---------- @@ -223,6 +224,7 @@ class FDC2(FiniteDifference): ghosts_layer_size = 1 def _compute_indices(self, step): + print 'lol' self._coeff = npw.asarray(1. / (2. * step)) self._m1 = [] @@ -244,7 +246,7 @@ class FDC2(FiniteDifference): return result def compute_and_add(self, tab, cdir, result, work): - """Apply FD scheme and add np.abs of the derivative inplace. + """Apply FD scheme and add the derivative inplace. Parameters ---------- @@ -403,7 +405,6 @@ class FDC4(FiniteDifference): ghosts_layer_size = 2 def _compute_indices(self, step): - self._m1 = [] self._m2 = [] self._a1 = [] diff --git a/hysop/numerics/stencil.py b/hysop/numerics/stencil.py new file mode 100644 index 0000000000000000000000000000000000000000..cf179c9a07166910d632dc02835d6f8d85808578 --- /dev/null +++ b/hysop/numerics/stencil.py @@ -0,0 +1,167 @@ + +import math, hashlib +import itertools as it + +import numpy as np +import sympy as sm + +import scipy as sp +import scipy.sparse + +import gmpy2 as gmp +from gmpy2 import mpq +def _mpqize(x): + return mpq(x.p,x.q) +mpqize = np.vectorize(_mpqize) + +class Stencil(object): + """ + Stencil used for finite abitrary dimension differences. + + Parameters + ---------- + coeffs: array_like + Coefficients of the stencil. + origin: array_like + Origin of the stencil from left-most point. + order: int + Order of the stencil. + dim: int + Spatial dimension of the stencil. + dtype: np.dtype + Data type of the stencil coefficients. + shape: np.ndarray + Shape of the stencil. + L: np.ndarray + Distance from origin to left-most point included in stencil support. + R: np.ndarray + Distance from origin to right-most point included in stencil support. + """ + + def __init__(self, coeffs, origin, order): + """ + Stencil used for finite abitrary dimension differences. + + Parameters + ---------- + coeffs: array_like + Coefficients of the stencil. + origin: array_like + Origin of the stencil from left-most point. + order: int + Order of the stencil. + + Raises + ------ + ValueError + Raised if one of component of `origin` is negative. + + See Also + -------- + StencilGenerator: Generate Stencil objects + """ + stencil = np.asarray(stencil_data) + origin = np.asarray(origin) + order = np.asarray(order) + + dim = stencil.ndim + dtype = stencil.dtype + shape = stencil.shape + + if (origin<0).any(): + raise ValueError('Origin component < 0!\norigin={}'.format(origin)) + L = origin + R = shape-origin-1 + + self.stencil = stencil + self.origin = origin + self.order = order + + self.dim = dim + self.dtype = dtype + self.shape = shape + + self.L = L + self.R = R + + def coo_stencil(self): + """ + Return a 2d stencil as a sparse coordinate matrix. + @return : scipy.sparse.coo_matrix + """ + assert self.dim==2 + return sp.sparse.coo_matrix(self.stencil,shape=self.shape,dtype=self.dtype) + + def iteritems(self): + """ + Return an (offset,coefficient) iterator iterating on all non zero coefficients. + Offset is taken from origin. + @return : itertools.iterator + """ + iterator = np.ndindex(self.shape) + iterator = it.imap(lambda x: (x-self.origin,self.stencil[x]), iterator) + iterator = it.ifilter(lambda x: x[1]!=0, iterator) + return iterator + + def hash(self, keep_only=8): + """ + Hash the stencil with its origin, order and coefficients. + The hash algorithm used is sha1. + By default only the 8 first chars are kept from the generated hash. + @parameter keep_only : number of chars kept from the hashed hexedecimal string. + @return : hexadecimal hash string + """ + stencil = self.stencil + m = hashlib.sha1() + m.update(self.origin) + m.update(self.order) + for d in stencil: + m.update(str(d)) + return m.hexdigest()[:keep_only] + + def is_centered(self,axe=None): + """ + Check if the stencil is centered. + @return : bool + """ + if axe==None: + return (L==R).all() + else: + return (L[axe]==R[axe]).all() + + def is_cross(self): + """ + Check if the stencil is a cross (zeros everywhere except on a nd cross). + @return : bool + """ + mask = np.ones(self.shape,dtype=bool) + access = self.origin.tolist() + for i in xrange(self.dim): + acc = [x for x in access] + acc[i] = slice(0,self.shape[i]) + mask[acc] = False + return not np.any(self.stencil*mask) + +class StencilGenerator(object): + """ + Generate a stencil from its order and derivative order. + Results are cached and compressed locally. + """ + + def __init__(self,dim,dtype=mpq): + """ + Initialize a StencilGenerator to generate stencils of dimension :attr:`dim` and type :attr:`dtype` + Results are cached and compressed locally. + """ + self.derivative = derivative + self.dim = dim + self.dtype = dtype + + L = np.asarray([L if np.isscalar(L) else np.asarray(L)) + #if R is None: + #R = k+order-L-1 + #assert((R>=0).all(), 'R<0...') + #else: + #assert((L+R+1 == k+order).all()), 'L+R+1 != k+order, cannot compute coefficients !' + #R = R if np.isscalar(R) else np.asarray(R)) + #N = L+R+1