From 7f27e04d6f3fa1c98c95fb695a468e45c3dc9ae8 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr> Date: Sat, 8 Dec 2018 15:41:28 +0100 Subject: [PATCH] added error_on_allocation flag for fft backends --- examples/sediment_deposit/sediment_deposit.py | 37 +++++++-------- hysop/fields/cartesian_discrete_field.py | 20 ++++++-- hysop/numerics/fft/fft.py | 33 +++++++++---- hysop/numerics/fft/gpyfft_fft.py | 46 ++++++++++++------- hysop/numerics/fft/host_fft.py | 2 + hysop/numerics/fft/opencl_fft.py | 4 +- hysop/operator/base/spectral_operator.py | 2 +- 7 files changed, 93 insertions(+), 51 deletions(-) diff --git a/examples/sediment_deposit/sediment_deposit.py b/examples/sediment_deposit/sediment_deposit.py index bbc005220..848f03001 100644 --- a/examples/sediment_deposit/sediment_deposit.py +++ b/examples/sediment_deposit/sediment_deposit.py @@ -3,9 +3,10 @@ import scipy as sp import sympy as sm import numba as nb -TANK_RATIO = 2 -SEDIMENT_COUNT = 28000 +TANK_RATIO = 2 +SEDIMENT_COUNT = 28000*TANK_RATIO SEDIMENT_RADIUS = 0.5e-3 +DISCRETIZATION = 4096 # initialize vorticity def init_vorticity(data, coords, component=None): @@ -58,8 +59,8 @@ def init_sediment(data, coords, nblobs, rblob): assert (rblob>=dx), 'Sediment radius < dx.' assert (rblob>=dy), 'Sediment radius < dy.' - Bx = TANK_RATIO*np.random.rand(nblobs) - By = 1*np.random.rand(nblobs) + Bx = 1*np.random.rand(nblobs) + By = TANK_RATIO*np.random.rand(nblobs) Ix = np.floor(Bx/dx).astype(np.int32) Iy = np.floor(By/dy).astype(np.int32) Px = Bx - Ix*dx @@ -136,7 +137,7 @@ def compute(args): dim = args.ndim if (dim==2): Xo = (0.0,)*dim - Xn = (1.0, float(TANK_RATIO)) + Xn = (float(TANK_RATIO), 1.0) nblobs = SEDIMENT_COUNT rblob = SEDIMENT_RADIUS npts = args.npts @@ -144,14 +145,14 @@ def compute(args): msg='The {}D has not been implemented yet.'.format(dim) raise NotImplementedError(msg) - nu_S = ScalarParameter(name='nu_S', dtype=args.dtype, const=True, initial_value=1e-8) + nu_S = ScalarParameter(name='nu_S', dtype=args.dtype, const=True, initial_value=1e-10) nu_W = ScalarParameter(name='nu_W', dtype=args.dtype, const=True, initial_value=1.00) - lboundaries = (BoxBoundaryCondition.SYMMETRIC,)*dim - rboundaries = (BoxBoundaryCondition.SYMMETRIC,)*dim + lboundaries = (BoxBoundaryCondition.SYMMETRIC, BoxBoundaryCondition.SYMMETRIC) + rboundaries = (BoxBoundaryCondition.OUTFLOW, BoxBoundaryCondition.SYMMETRIC) - S_lboundaries = (BoundaryCondition.HOMOGENEOUS_NEUMANN,)*dim - S_rboundaries = (BoundaryCondition.HOMOGENEOUS_NEUMANN,)*dim + #S_lboundaries = (BoundaryCondition.HOMOGENEOUS_NEUMANN, BoundaryCondition.HOMOGENEOUS_NEUMANN) + #S_rboundaries = (BoundaryCondition.HOMOGENEOUS_NEUMANN, BoundaryCondition.HOMOGENEOUS_NEUMANN) box = Box(origin=Xo, length=np.subtract(Xn,Xo), lboundaries=lboundaries, rboundaries=rboundaries) @@ -190,7 +191,7 @@ def compute(args): t, dt = TimeParameters(dtype=args.dtype) velo = VelocityField(domain=box, dtype=args.dtype) vorti = VorticityField(velocity=velo) - S = Field(domain=box, name='S', dtype=args.dtype,) + S = Field(domain=box, name='S', dtype=args.dtype) #lboundaries=S_lboundaries, rboundaries=S_rboundaries) # Symbolic fields @@ -242,7 +243,7 @@ def compute(args): #> External force rot(-rho*g) = rot(S) Fext = np.zeros(shape=(dim,), dtype=object).view(SymbolicTensor) fext = -Ss - Fext[0] = fext + Fext[1] = fext lhs = Ws.diff(frame.time) rhs = curl(Fext, frame) exprs = Assignment.assign(lhs, rhs) @@ -298,7 +299,7 @@ def compute(args): ### Adaptive timestep operator adapt_dt = AdaptiveTimeStep(dt, equivalent_CFL=True, name='merge_dt', pretty_name='dt', - max_dt=1e0) + max_dt=1e2) dt_cfl = adapt_dt.push_cfl_criteria(cfl=args.cfl, Fmin=min_max_U.Fmin, Fmax=min_max_U.Fmax, @@ -326,7 +327,7 @@ def compute(args): problem = Problem(method=method) problem.insert(poisson, min_max_U, min_max_W, adapt_dt, - #diffuse_S, + diffuse_S, splitting, dump_fields, compute_mean_fields) @@ -351,7 +352,8 @@ def compute(args): # Initialize vorticity, velocity, S on all topologies problem.initialize_field(field=velo, formula=init_velocity) problem.initialize_field(field=vorti, formula=init_vorticity) - problem.initialize_field(field=S, formula=init_sediment, nblobs=nblobs, rblob=rblob) + problem.initialize_field(field=S, formula=init_sediment, + nblobs=nblobs, rblob=rblob, without_ghosts=True) # Finally solve the problem problem.solve(simu, dry_run=args.dry_run) @@ -390,11 +392,10 @@ if __name__=='__main__': parser = ParticleAboveSaltArgParser() parser.set_defaults(impl='cl', ndim=2, - npts=(3072+1,TANK_RATIO*3072+1), - #npts=(256+1,TANK_RATIO*256+1), + npts=(TANK_RATIO*DISCRETIZATION+1,DISCRETIZATION+1), box_origin=(0.0,), box_length=(1.0,), tstart=0.0, tend=100000.0, - dt=1e-6, cfl=1.0, lcfl=0.95, + dt=1e-6, cfl=16.0, lcfl=0.95, dump_times=tuple(float(x) for x in range(0,100000,100)), dump_freq=0) diff --git a/hysop/fields/cartesian_discrete_field.py b/hysop/fields/cartesian_discrete_field.py index 00b875575..d0570a694 100644 --- a/hysop/fields/cartesian_discrete_field.py +++ b/hysop/fields/cartesian_discrete_field.py @@ -28,8 +28,9 @@ from hysop.core.mpi.topo_tools import TopoTools class CartesianDiscreteScalarFieldViewContainerI(object): def initialize(self, formula, vectorize=False, - exchange_ghosts=True, exchange_kwds=None, - only_finite=True, reorder=None, quiet=False, **kwds): + without_ghosts=False, exchange_ghosts=True, + exchange_kwds=None, only_finite=True, + reorder=None, quiet=False, **kwds): """ Initialize the cartesian field data. @@ -47,6 +48,8 @@ class CartesianDiscreteScalarFieldViewContainerI(object): Defaults to True. exchange_ghosts: bool, optional, defaults to True Should we exchange ghosts after initialization ? + without_ghosts: boolean, optional, defaults to False + Do not initialize ghosts (only for formula init). exchange_kwds: dict, optional, Extra exchange keyword arguments passed to ghost exchange. Only used if exchange_ghosts is set to True. @@ -107,14 +110,21 @@ class CartesianDiscreteScalarFieldViewContainerI(object): vprint(msg) host_backend = self.backend.host_array_backend data = tuple(host_backend.empty(shape=d.shape, dtype=d.dtype) - for d in self.data) + for d in self.buffers) + + if without_ghosts: + vdata = tuple(buf[df.compute_slices] + for (buf,df) in zip(data, self.discrete_field_views())) + else: + vdata = data if from_formula: + # initialize from a python method assert ('data' not in kwds), 'data is a reserved keyword.' assert ('coords' not in kwds), 'coords is a reserved keyword.' coords = self.get_attributes('mesh_coords') - formula_kwds = dict(data=data, coords=coords) + formula_kwds = dict(data=vdata, coords=coords) formula_kwds.update(kwds) for kwd in reorder: vals = to_list(kwds[kwd]) @@ -171,7 +181,7 @@ class CartesianDiscreteScalarFieldViewContainerI(object): for (d0,d1) in zip(self.data, data)), 'Array shape was altered.' if only_finite: - for (i,d) in enumerate(data): + for (i,d) in enumerate(vdata): if np.isnan(d).any(): msg='Initialization of {} on component {} failed, got NaNs.' msg=msg.format(self.pretty_name, i) diff --git a/hysop/numerics/fft/fft.py b/hysop/numerics/fft/fft.py index 148cc0863..714aa10ce 100644 --- a/hysop/numerics/fft/fft.py +++ b/hysop/numerics/fft/fft.py @@ -306,11 +306,14 @@ class FFTI(object): fn = functools.partial(fn, **fkwds) return fn - def __init__(self, backend, warn_on_allocation=True): + def __init__(self, backend, + warn_on_allocation=True, + error_on_allocation=False): """Initializes the interface and default supported real and complex types.""" from hysop.core.arrays.array_backend import ArrayBackend check_instance(backend, ArrayBackend) check_instance(warn_on_allocation, bool) + check_instance(error_on_allocation, bool) self.supported_ftypes = (np.float32, np.float64) self.supported_ctypes = (np.complex64, np.complex128) @@ -318,16 +321,20 @@ class FFTI(object): self.supported_sine_transforms = (1,2,3) self.backend = backend - self.warn_on_allocation = warn_on_allocation + self.warn_on_allocation = warn_on_allocation + self.error_on_allocation = error_on_allocation def allocate_output(self, out, shape, dtype): """Alocate output if required and check shape and dtype.""" if (out is None): - if self.warn_on_allocation: + if self.warn_on_allocation or self.error_on_allocation: nbytes = np.prod(shape, dtype=np.int64)*dtype.itemsize msg='FftwFFT: allocating aligned output array of size {}.' msg=msg.format(bytes2str(nbytes)) - warnings.warn(msg, HysopFFTWarning) + if self.error_on_allocation: + raise RuntimeError(msg) + else: + warnings.warn(msg, HysopFFTWarning) out = self.backend.empty(shape=shape, dtype=dtype) else: assert out.dtype == dtype @@ -341,16 +348,24 @@ class FFTI(object): msg=msg.format(cls.__name__) raise NotImplementedError(msg) - def allocate_plans(self, plans): + def allocate_plans(self, op, plans, tmp_buffer=None): """Allocate and share a buffer on given backend to a group of plans.""" backend = self.backend tmp_size = max(plan.required_buffer_size for plan in plans) if (tmp_size>0): - msg='Allocating an additional {} temporary buffer for FFT backend {}.' - msg=msg.format(bytes2str(tmp_size), self.__class__.__name__) - vprint(msg) - tmp_buffer = backend.empty(shape=(tmp_size), dtype=np.uint8) + msg='Operator {}: Allocating an additional {} temporary buffer for FFT backend {}.' + msg=msg.format(op.pretty_name, bytes2str(tmp_size), self.__class__.__name__) + if (tmp_buffer is not None): + assert tmp_buffer.nbytes >= tmp_size + else: + if self.error_on_allocation: + raise RuntimeError(msg) + elif self.warn_on_allocation: + warnings.warn(msg, HysopGpyFftWarning) + else: + vprint(msg) + tmp_buffer = backend.empty(shape=(tmp_size), dtype=np.uint8) for plan in plans: if (plan.required_buffer_size > tmp_buffer.nbytes): msg='\nFATAL ERROR: Failed to allocate temporary buffer for clFFT.' diff --git a/hysop/numerics/fft/gpyfft_fft.py b/hysop/numerics/fft/gpyfft_fft.py index d616bce60..c52a56daf 100644 --- a/hysop/numerics/fft/gpyfft_fft.py +++ b/hysop/numerics/fft/gpyfft_fft.py @@ -48,8 +48,9 @@ class GpyFFTPlan(OpenClFFTPlanI): callback_kwds=None, direction_forward=True, hardcode_twiddles=True, + warn_on_unaligned_output_offset=True, warn_on_allocation=True, - warn_on_unaligned_output_offset=True): + error_on_allocation=False): """ Wrap gpyfft.FFT to allow more versatile callback settings and buffer allocations. @@ -86,11 +87,13 @@ class GpyFFTPlan(OpenClFFTPlanI): in the opencl code. Only used by DCT-II, DCT-III, DST-II and DST-III. If set to False, the twiddles will be computed by the device on the fly, freeing device __constant memory banks. - warn_on_allocation: bool, optional, defaults to True - Emit a warning if the planner has to allocate opencl buffers. warn_on_unaligned_output_offset: bool, optional, defaults to True Emit a warning if the planner encounter an output array that has a non zero offset. + warn_on_allocation: bool, optional, defaults to True + Emit a warning if the planner has to allocate opencl buffers. + error_on_allocation: bool, optional, defaults to False + Raise a RuntimeError if the planner has to allocate opencl buffers. """ super(GpyFFTPlan, self).__init__() @@ -103,8 +106,9 @@ class GpyFFTPlan(OpenClFFTPlanI): callback_kwds = first_not_None(callback_kwds, {}) self.cl_env = cl_env - self.warn_on_allocation = warn_on_allocation self.warn_on_unaligned_output_offset = warn_on_unaligned_output_offset + self.warn_on_allocation = warn_on_allocation + self.error_on_allocation = error_on_allocation self.temp_buffer = None self._setup = False @@ -649,10 +653,13 @@ Post callback source code: size = self.plan.temp_array_size if (size>0): if (buf is None): - if self.warn_on_allocation: + if self.warn_on_allocation or self.error_on_allocation: msg='Allocating temporary buffer of size {} for clFFT::{}.' msg=msg.format(bytes2str(size), id(self)) - warnings.warn(msg, HysopGpyFftWarning) + if self.error_on_allocation: + raise RuntimeError(msg) + else: + warnings.warn(msg, HysopGpyFftWarning) buf = cl.Buffer(self.context, cl.mem_flags.READ_WRITE, size=size) self.temp_buffer = buf elif (buf.size != size): @@ -1271,11 +1278,13 @@ class GpyFFT(OpenClFFTI): backend=None, allocator=None, warn_on_allocation=True, warn_on_unaligned_output_offset=True, + error_on_allocation=False, **kwds): super(GpyFFT, self).__init__(cl_env=cl_env, backend=backend, allocator=allocator, - warn_on_allocation=warn_on_allocation, **kwds) + warn_on_allocation=warn_on_allocation, + error_on_allocation=error_on_allocation, **kwds) self.supported_ftypes = (np.float32, np.float64) self.supported_ctypes = (np.complex64, np.complex128) @@ -1286,11 +1295,14 @@ class GpyFFT(OpenClFFTI): def allocate_output(self, out, shape, dtype): """Alocate output if required and check shape and dtype.""" if (out is None): - if self.warn_on_allocation: + if self.warn_on_allocation or self.error_on_allocation: nbytes = prod(shape)*dtype.itemsize msg='GpyFFT: allocating output array of size {}.' msg=msg.format(bytes2str(nbytes)) - warnings.warn(msg, HysopGpyFftWarning) + if self.error_on_allocation: + raise RuntimeError(msg) + else: + warnings.warn(msg, HysopGpyFftWarning) out = self.backend.empty(shape=shape, dtype=dtype) else: assert out.dtype == dtype @@ -1299,14 +1311,14 @@ class GpyFFT(OpenClFFTI): def bake_kwds(self, **kwds): plan_kwds = {} - plan_kwds['in_array'] = kwds.pop('a') - plan_kwds['out_array'] = kwds.pop('out') - plan_kwds['scaling'] = kwds.pop('scaling', None) - plan_kwds['scale_by_size'] = kwds.pop('scale_by_size', None) - plan_kwds['axes'] = kwds.pop('axes', (kwds.pop('axis'),)) - plan_kwds['cl_env'] = kwds.pop('cl_env', self.cl_env) - plan_kwds['warn_on_allocation'] = kwds.pop('warn_on_allocation', - self.warn_on_allocation) + plan_kwds['in_array'] = kwds.pop('a') + plan_kwds['out_array'] = kwds.pop('out') + plan_kwds['scaling'] = kwds.pop('scaling', None) + plan_kwds['scale_by_size'] = kwds.pop('scale_by_size', None) + plan_kwds['axes'] = kwds.pop('axes', (kwds.pop('axis'),)) + plan_kwds['cl_env'] = kwds.pop('cl_env', self.cl_env) + plan_kwds['warn_on_allocation'] = kwds.pop('warn_on_allocation', self.warn_on_allocation) + plan_kwds['error_on_allocation'] = kwds.pop('error_on_allocation', self.error_on_allocation) plan_kwds['warn_on_unaligned_output_offset'] = \ kwds.pop('warn_on_unaligned_output_offset', self.warn_on_unaligned_output_offset) diff --git a/hysop/numerics/fft/host_fft.py b/hysop/numerics/fft/host_fft.py index c5a67d927..9ff82cae8 100644 --- a/hysop/numerics/fft/host_fft.py +++ b/hysop/numerics/fft/host_fft.py @@ -62,6 +62,7 @@ class HostFFTI(FFTI): destroy_input=False, warn_on_allocation=True, warn_on_misalignment=True, + error_on_allocation=False, **kwds): """ Get the default host FFT interface which is a multithreaded FFTW interface with @@ -78,6 +79,7 @@ class HostFFTI(FFTI): destroy_input=destroy_input, warn_on_allocation=warn_on_allocation, warn_on_misalignment=warn_on_misalignment, + error_on_allocation=error_on_allocation, **kwds) def new_queue(self, tg, name): diff --git a/hysop/numerics/fft/opencl_fft.py b/hysop/numerics/fft/opencl_fft.py index f7baef554..b67149686 100644 --- a/hysop/numerics/fft/opencl_fft.py +++ b/hysop/numerics/fft/opencl_fft.py @@ -80,7 +80,8 @@ class OpenClFFTI(FFTI): @classmethod def default_interface(cls, cl_env, backend=None, allocator=None, - warn_on_allocation=True, + warn_on_allocation=False, + error_on_allocation=True, warn_on_unaligned_output_offset=True, **kwds): """Get the default OpenCl FFT interface which is a GpyFFT interface.""" @@ -88,6 +89,7 @@ class OpenClFFTI(FFTI): return GpyFFT(cl_env=cl_env, backend=backend, allocator=allocator, warn_on_allocation=warn_on_allocation, + error_on_allocation=error_on_allocation, warn_on_unaligned_output_offset=warn_on_unaligned_output_offset, **kwds) diff --git a/hysop/operator/base/spectral_operator.py b/hysop/operator/base/spectral_operator.py index b79db00b6..7f5226fb9 100644 --- a/hysop/operator/base/spectral_operator.py +++ b/hysop/operator/base/spectral_operator.py @@ -1756,7 +1756,7 @@ SPECTRAL TRANSFORM SETUP 'post-transform-callback') # allocate fft plans - FFTI.allocate_plans(fft_plans) + FFTI.allocate_plans(op, fft_plans) self._queue = queue self._ready = True -- GitLab