From 7f27e04d6f3fa1c98c95fb695a468e45c3dc9ae8 Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr>
Date: Sat, 8 Dec 2018 15:41:28 +0100
Subject: [PATCH] added error_on_allocation flag for fft backends

---
 examples/sediment_deposit/sediment_deposit.py | 37 +++++++--------
 hysop/fields/cartesian_discrete_field.py      | 20 ++++++--
 hysop/numerics/fft/fft.py                     | 33 +++++++++----
 hysop/numerics/fft/gpyfft_fft.py              | 46 ++++++++++++-------
 hysop/numerics/fft/host_fft.py                |  2 +
 hysop/numerics/fft/opencl_fft.py              |  4 +-
 hysop/operator/base/spectral_operator.py      |  2 +-
 7 files changed, 93 insertions(+), 51 deletions(-)

diff --git a/examples/sediment_deposit/sediment_deposit.py b/examples/sediment_deposit/sediment_deposit.py
index bbc005220..848f03001 100644
--- a/examples/sediment_deposit/sediment_deposit.py
+++ b/examples/sediment_deposit/sediment_deposit.py
@@ -3,9 +3,10 @@ import scipy as sp
 import sympy as sm
 import numba as nb
 
-TANK_RATIO = 2
-SEDIMENT_COUNT  = 28000
+TANK_RATIO      = 2
+SEDIMENT_COUNT  = 28000*TANK_RATIO
 SEDIMENT_RADIUS = 0.5e-3
+DISCRETIZATION  = 4096
 
 # initialize vorticity
 def init_vorticity(data, coords, component=None):
@@ -58,8 +59,8 @@ def init_sediment(data, coords, nblobs, rblob):
         assert (rblob>=dx), 'Sediment radius < dx.'
         assert (rblob>=dy), 'Sediment radius < dy.'
         
-        Bx = TANK_RATIO*np.random.rand(nblobs)
-        By = 1*np.random.rand(nblobs)
+        Bx = 1*np.random.rand(nblobs)
+        By = TANK_RATIO*np.random.rand(nblobs)
         Ix = np.floor(Bx/dx).astype(np.int32)
         Iy = np.floor(By/dy).astype(np.int32)
         Px = Bx - Ix*dx
@@ -136,7 +137,7 @@ def compute(args):
     dim = args.ndim
     if (dim==2):
         Xo = (0.0,)*dim
-        Xn = (1.0, float(TANK_RATIO))
+        Xn = (float(TANK_RATIO), 1.0)
         nblobs = SEDIMENT_COUNT
         rblob  = SEDIMENT_RADIUS
         npts = args.npts
@@ -144,14 +145,14 @@ def compute(args):
         msg='The {}D has not been implemented yet.'.format(dim)
         raise NotImplementedError(msg)
 
-    nu_S = ScalarParameter(name='nu_S', dtype=args.dtype, const=True, initial_value=1e-8)
+    nu_S = ScalarParameter(name='nu_S', dtype=args.dtype, const=True, initial_value=1e-10)
     nu_W = ScalarParameter(name='nu_W', dtype=args.dtype, const=True, initial_value=1.00)
 
-    lboundaries = (BoxBoundaryCondition.SYMMETRIC,)*dim
-    rboundaries = (BoxBoundaryCondition.SYMMETRIC,)*dim
+    lboundaries = (BoxBoundaryCondition.SYMMETRIC, BoxBoundaryCondition.SYMMETRIC)
+    rboundaries = (BoxBoundaryCondition.OUTFLOW,   BoxBoundaryCondition.SYMMETRIC)
 
-    S_lboundaries = (BoundaryCondition.HOMOGENEOUS_NEUMANN,)*dim
-    S_rboundaries = (BoundaryCondition.HOMOGENEOUS_NEUMANN,)*dim
+    #S_lboundaries = (BoundaryCondition.HOMOGENEOUS_NEUMANN, BoundaryCondition.HOMOGENEOUS_NEUMANN)
+    #S_rboundaries = (BoundaryCondition.HOMOGENEOUS_NEUMANN, BoundaryCondition.HOMOGENEOUS_NEUMANN)
 
     box = Box(origin=Xo, length=np.subtract(Xn,Xo),
                 lboundaries=lboundaries, rboundaries=rboundaries)
@@ -190,7 +191,7 @@ def compute(args):
     t, dt = TimeParameters(dtype=args.dtype)
     velo  = VelocityField(domain=box, dtype=args.dtype)
     vorti = VorticityField(velocity=velo)
-    S = Field(domain=box, name='S', dtype=args.dtype,)
+    S = Field(domain=box, name='S', dtype=args.dtype)
             #lboundaries=S_lboundaries, rboundaries=S_rboundaries)
     
     # Symbolic fields
@@ -242,7 +243,7 @@ def compute(args):
     #> External force rot(-rho*g) = rot(S)
     Fext = np.zeros(shape=(dim,), dtype=object).view(SymbolicTensor)
     fext = -Ss
-    Fext[0] = fext
+    Fext[1] = fext
     lhs = Ws.diff(frame.time)
     rhs = curl(Fext, frame)
     exprs = Assignment.assign(lhs, rhs)
@@ -298,7 +299,7 @@ def compute(args):
     ### Adaptive timestep operator
     adapt_dt = AdaptiveTimeStep(dt, equivalent_CFL=True,
                                     name='merge_dt', pretty_name='dt',
-                                    max_dt=1e0)
+                                    max_dt=1e2)
     dt_cfl = adapt_dt.push_cfl_criteria(cfl=args.cfl, 
                                         Fmin=min_max_U.Fmin,
                                         Fmax=min_max_U.Fmax,
@@ -326,7 +327,7 @@ def compute(args):
     problem = Problem(method=method)
     problem.insert(poisson, 
                    min_max_U, min_max_W, adapt_dt,
-                   #diffuse_S,
+                   diffuse_S,
                    splitting, 
                    dump_fields,
                    compute_mean_fields)
@@ -351,7 +352,8 @@ def compute(args):
     # Initialize vorticity, velocity, S on all topologies
     problem.initialize_field(field=velo,  formula=init_velocity)
     problem.initialize_field(field=vorti, formula=init_vorticity)
-    problem.initialize_field(field=S,     formula=init_sediment, nblobs=nblobs, rblob=rblob)
+    problem.initialize_field(field=S,     formula=init_sediment, 
+            nblobs=nblobs, rblob=rblob, without_ghosts=True)
     
     # Finally solve the problem 
     problem.solve(simu, dry_run=args.dry_run)
@@ -390,11 +392,10 @@ if __name__=='__main__':
     parser = ParticleAboveSaltArgParser()
 
     parser.set_defaults(impl='cl', ndim=2, 
-                        npts=(3072+1,TANK_RATIO*3072+1),
-                        #npts=(256+1,TANK_RATIO*256+1),
+                        npts=(TANK_RATIO*DISCRETIZATION+1,DISCRETIZATION+1),
                         box_origin=(0.0,), box_length=(1.0,), 
                         tstart=0.0, tend=100000.0, 
-                        dt=1e-6, cfl=1.0, lcfl=0.95,
+                        dt=1e-6, cfl=16.0, lcfl=0.95,
                         dump_times=tuple(float(x) for x in range(0,100000,100)),
                         dump_freq=0)
 
diff --git a/hysop/fields/cartesian_discrete_field.py b/hysop/fields/cartesian_discrete_field.py
index 00b875575..d0570a694 100644
--- a/hysop/fields/cartesian_discrete_field.py
+++ b/hysop/fields/cartesian_discrete_field.py
@@ -28,8 +28,9 @@ from hysop.core.mpi.topo_tools import TopoTools
 
 class CartesianDiscreteScalarFieldViewContainerI(object):
     def initialize(self, formula, vectorize=False, 
-            exchange_ghosts=True, exchange_kwds=None,
-            only_finite=True, reorder=None, quiet=False, **kwds):
+            without_ghosts=False, exchange_ghosts=True,
+            exchange_kwds=None, only_finite=True,
+            reorder=None, quiet=False, **kwds):
         """
         Initialize the cartesian field data.
 
@@ -47,6 +48,8 @@ class CartesianDiscreteScalarFieldViewContainerI(object):
             Defaults to True.
         exchange_ghosts: bool, optional, defaults to True
             Should we exchange ghosts after initialization ?
+        without_ghosts: boolean, optional, defaults to False
+            Do not initialize ghosts (only for formula init).
         exchange_kwds: dict, optional,
             Extra exchange keyword arguments passed to ghost exchange.
             Only used if exchange_ghosts is set to True.
@@ -107,14 +110,21 @@ class CartesianDiscreteScalarFieldViewContainerI(object):
                 vprint(msg)
             host_backend = self.backend.host_array_backend
             data = tuple(host_backend.empty(shape=d.shape, dtype=d.dtype) 
-                         for d in self.data)
+                         for d in self.buffers)
+        
+        if without_ghosts:
+            vdata = tuple(buf[df.compute_slices]
+                          for (buf,df) in zip(data, self.discrete_field_views()))
+        else:
+            vdata = data
 
         if from_formula:
+
             # initialize from a python method
             assert ('data'   not in kwds), 'data is a reserved keyword.'
             assert ('coords' not in kwds), 'coords is a reserved keyword.'
             coords = self.get_attributes('mesh_coords')
-            formula_kwds = dict(data=data, coords=coords)
+            formula_kwds = dict(data=vdata, coords=coords)
             formula_kwds.update(kwds)
             for kwd in reorder:
                 vals = to_list(kwds[kwd])
@@ -171,7 +181,7 @@ class CartesianDiscreteScalarFieldViewContainerI(object):
                     for (d0,d1) in zip(self.data, data)), 'Array shape was altered.'
 
         if only_finite:
-            for (i,d) in enumerate(data):
+            for (i,d) in enumerate(vdata):
                 if np.isnan(d).any():
                     msg='Initialization of {} on component {} failed, got NaNs.'
                     msg=msg.format(self.pretty_name, i)
diff --git a/hysop/numerics/fft/fft.py b/hysop/numerics/fft/fft.py
index 148cc0863..714aa10ce 100644
--- a/hysop/numerics/fft/fft.py
+++ b/hysop/numerics/fft/fft.py
@@ -306,11 +306,14 @@ class FFTI(object):
             fn = functools.partial(fn, **fkwds)
         return fn
 
-    def __init__(self, backend, warn_on_allocation=True):
+    def __init__(self, backend, 
+            warn_on_allocation=True,
+            error_on_allocation=False):
         """Initializes the interface and default supported real and complex types."""
         from hysop.core.arrays.array_backend import ArrayBackend
         check_instance(backend, ArrayBackend)
         check_instance(warn_on_allocation, bool)
+        check_instance(error_on_allocation, bool)
 
         self.supported_ftypes = (np.float32, np.float64)
         self.supported_ctypes = (np.complex64, np.complex128)
@@ -318,16 +321,20 @@ class FFTI(object):
         self.supported_sine_transforms   = (1,2,3)
 
         self.backend = backend
-        self.warn_on_allocation = warn_on_allocation
+        self.warn_on_allocation  = warn_on_allocation
+        self.error_on_allocation = error_on_allocation
    
     def allocate_output(self, out, shape, dtype):
         """Alocate output if required and check shape and dtype."""
         if (out is None):
-            if self.warn_on_allocation:
+            if self.warn_on_allocation or self.error_on_allocation:
                 nbytes = np.prod(shape, dtype=np.int64)*dtype.itemsize
                 msg='FftwFFT: allocating aligned output array of size {}.'
                 msg=msg.format(bytes2str(nbytes))
-                warnings.warn(msg, HysopFFTWarning)
+                if self.error_on_allocation:
+                    raise RuntimeError(msg)
+                else:
+                    warnings.warn(msg, HysopFFTWarning)
             out = self.backend.empty(shape=shape, dtype=dtype)
         else:
             assert out.dtype == dtype
@@ -341,16 +348,24 @@ class FFTI(object):
         msg=msg.format(cls.__name__)
         raise NotImplementedError(msg)
 
-    def allocate_plans(self, plans):
+    def allocate_plans(self, op, plans, tmp_buffer=None):
         """Allocate and share a buffer on given backend to a group of plans."""
         backend = self.backend
         tmp_size = max(plan.required_buffer_size for plan in plans)
 
         if (tmp_size>0):
-            msg='Allocating an additional {} temporary buffer for FFT backend {}.'
-            msg=msg.format(bytes2str(tmp_size), self.__class__.__name__)
-            vprint(msg)
-            tmp_buffer = backend.empty(shape=(tmp_size), dtype=np.uint8)
+            msg='Operator {}: Allocating an additional {} temporary buffer for FFT backend {}.'
+            msg=msg.format(op.pretty_name, bytes2str(tmp_size), self.__class__.__name__)
+            if (tmp_buffer is not None):
+                assert tmp_buffer.nbytes >= tmp_size
+            else:
+                if self.error_on_allocation:
+                    raise RuntimeError(msg)
+                elif self.warn_on_allocation:
+                    warnings.warn(msg, HysopGpyFftWarning)
+                else:
+                    vprint(msg)
+                tmp_buffer = backend.empty(shape=(tmp_size), dtype=np.uint8)
             for plan in plans:
                 if (plan.required_buffer_size > tmp_buffer.nbytes):
                     msg='\nFATAL ERROR: Failed to allocate temporary buffer for clFFT.'
diff --git a/hysop/numerics/fft/gpyfft_fft.py b/hysop/numerics/fft/gpyfft_fft.py
index d616bce60..c52a56daf 100644
--- a/hysop/numerics/fft/gpyfft_fft.py
+++ b/hysop/numerics/fft/gpyfft_fft.py
@@ -48,8 +48,9 @@ class GpyFFTPlan(OpenClFFTPlanI):
             callback_kwds=None,
             direction_forward=True,
             hardcode_twiddles=True,
+            warn_on_unaligned_output_offset=True,
             warn_on_allocation=True,
-            warn_on_unaligned_output_offset=True):
+            error_on_allocation=False):
         """
         Wrap gpyfft.FFT to allow more versatile callback settings
         and buffer allocations.
@@ -86,11 +87,13 @@ class GpyFFTPlan(OpenClFFTPlanI):
             in the opencl code. Only used by DCT-II, DCT-III, DST-II and DST-III. 
             If set to False, the twiddles will be computed by the device on the 
             fly, freeing device __constant memory banks.
-        warn_on_allocation: bool, optional, defaults to True
-            Emit a warning if the planner has to allocate opencl buffers.
         warn_on_unaligned_output_offset: bool, optional, defaults to True
             Emit a warning if the planner encounter an output array that has
             a non zero offset.
+        warn_on_allocation: bool, optional, defaults to True
+            Emit a warning if the planner has to allocate opencl buffers.
+        error_on_allocation: bool, optional, defaults to False
+            Raise a RuntimeError if the planner has to allocate opencl buffers.
         """
         super(GpyFFTPlan, self).__init__()
 
@@ -103,8 +106,9 @@ class GpyFFTPlan(OpenClFFTPlanI):
         callback_kwds = first_not_None(callback_kwds, {})
 
         self.cl_env  = cl_env
-        self.warn_on_allocation = warn_on_allocation
         self.warn_on_unaligned_output_offset = warn_on_unaligned_output_offset
+        self.warn_on_allocation  = warn_on_allocation
+        self.error_on_allocation = error_on_allocation
 
         self.temp_buffer = None
         self._setup      = False
@@ -649,10 +653,13 @@ Post callback source code:
         size = self.plan.temp_array_size
         if (size>0):
             if (buf is None):
-                if self.warn_on_allocation:
+                if self.warn_on_allocation or self.error_on_allocation:
                     msg='Allocating temporary buffer of size {} for clFFT::{}.'
                     msg=msg.format(bytes2str(size), id(self))
-                    warnings.warn(msg, HysopGpyFftWarning)
+                    if self.error_on_allocation:
+                        raise RuntimeError(msg)
+                    else:
+                        warnings.warn(msg, HysopGpyFftWarning)
                 buf = cl.Buffer(self.context, cl.mem_flags.READ_WRITE, size=size)
                 self.temp_buffer = buf
             elif (buf.size != size):
@@ -1271,11 +1278,13 @@ class GpyFFT(OpenClFFTI):
                        backend=None, allocator=None,
                        warn_on_allocation=True,
                        warn_on_unaligned_output_offset=True, 
+                       error_on_allocation=False,
                        **kwds):
 
         super(GpyFFT, self).__init__(cl_env=cl_env, 
                 backend=backend, allocator=allocator, 
-                warn_on_allocation=warn_on_allocation, **kwds)
+                warn_on_allocation=warn_on_allocation, 
+                error_on_allocation=error_on_allocation, **kwds)
 
         self.supported_ftypes = (np.float32, np.float64)
         self.supported_ctypes = (np.complex64, np.complex128)
@@ -1286,11 +1295,14 @@ class GpyFFT(OpenClFFTI):
     def allocate_output(self, out, shape, dtype):
         """Alocate output if required and check shape and dtype."""
         if (out is None):
-            if self.warn_on_allocation:
+            if self.warn_on_allocation or self.error_on_allocation:
                 nbytes = prod(shape)*dtype.itemsize
                 msg='GpyFFT: allocating output array of size {}.'
                 msg=msg.format(bytes2str(nbytes))
-                warnings.warn(msg, HysopGpyFftWarning)
+                if self.error_on_allocation:
+                    raise RuntimeError(msg)
+                else:
+                    warnings.warn(msg, HysopGpyFftWarning)
             out = self.backend.empty(shape=shape, dtype=dtype)
         else:
             assert out.dtype == dtype
@@ -1299,14 +1311,14 @@ class GpyFFT(OpenClFFTI):
 
     def bake_kwds(self, **kwds):
         plan_kwds = {}
-        plan_kwds['in_array']           = kwds.pop('a')
-        plan_kwds['out_array']          = kwds.pop('out')
-        plan_kwds['scaling']            = kwds.pop('scaling', None)
-        plan_kwds['scale_by_size']      = kwds.pop('scale_by_size', None)
-        plan_kwds['axes']               = kwds.pop('axes', (kwds.pop('axis'),))
-        plan_kwds['cl_env']             = kwds.pop('cl_env',  self.cl_env)
-        plan_kwds['warn_on_allocation'] = kwds.pop('warn_on_allocation', 
-                self.warn_on_allocation)
+        plan_kwds['in_array']            = kwds.pop('a')
+        plan_kwds['out_array']           = kwds.pop('out')
+        plan_kwds['scaling']             = kwds.pop('scaling', None)
+        plan_kwds['scale_by_size']       = kwds.pop('scale_by_size', None)
+        plan_kwds['axes']                = kwds.pop('axes', (kwds.pop('axis'),))
+        plan_kwds['cl_env']              = kwds.pop('cl_env',  self.cl_env)
+        plan_kwds['warn_on_allocation']  = kwds.pop('warn_on_allocation',  self.warn_on_allocation)
+        plan_kwds['error_on_allocation'] = kwds.pop('error_on_allocation', self.error_on_allocation)
         plan_kwds['warn_on_unaligned_output_offset'] = \
                 kwds.pop('warn_on_unaligned_output_offset', 
                         self.warn_on_unaligned_output_offset)
diff --git a/hysop/numerics/fft/host_fft.py b/hysop/numerics/fft/host_fft.py
index c5a67d927..9ff82cae8 100644
--- a/hysop/numerics/fft/host_fft.py
+++ b/hysop/numerics/fft/host_fft.py
@@ -62,6 +62,7 @@ class HostFFTI(FFTI):
                        destroy_input=False,
                        warn_on_allocation=True,
                        warn_on_misalignment=True,
+                       error_on_allocation=False,
                        **kwds):
         """
         Get the default host FFT interface which is a multithreaded FFTW interface with 
@@ -78,6 +79,7 @@ class HostFFTI(FFTI):
                        destroy_input=destroy_input,
                        warn_on_allocation=warn_on_allocation,
                        warn_on_misalignment=warn_on_misalignment,
+                       error_on_allocation=error_on_allocation,
                        **kwds)
     
     def new_queue(self, tg, name):
diff --git a/hysop/numerics/fft/opencl_fft.py b/hysop/numerics/fft/opencl_fft.py
index f7baef554..b67149686 100644
--- a/hysop/numerics/fft/opencl_fft.py
+++ b/hysop/numerics/fft/opencl_fft.py
@@ -80,7 +80,8 @@ class OpenClFFTI(FFTI):
     @classmethod
     def default_interface(cls, cl_env,
                        backend=None, allocator=None,
-                       warn_on_allocation=True,
+                       warn_on_allocation=False,
+                       error_on_allocation=True,
                        warn_on_unaligned_output_offset=True,
                        **kwds):
         """Get the default OpenCl FFT interface which is a GpyFFT interface."""
@@ -88,6 +89,7 @@ class OpenClFFTI(FFTI):
         return GpyFFT(cl_env=cl_env,
                       backend=backend, allocator=allocator,
                       warn_on_allocation=warn_on_allocation,
+                      error_on_allocation=error_on_allocation,
                       warn_on_unaligned_output_offset=warn_on_unaligned_output_offset,
                       **kwds)
 
diff --git a/hysop/operator/base/spectral_operator.py b/hysop/operator/base/spectral_operator.py
index b79db00b6..7f5226fb9 100644
--- a/hysop/operator/base/spectral_operator.py
+++ b/hysop/operator/base/spectral_operator.py
@@ -1756,7 +1756,7 @@ SPECTRAL TRANSFORM SETUP
                          'post-transform-callback')
 
         # allocate fft plans
-        FFTI.allocate_plans(fft_plans)
+        FFTI.allocate_plans(op, fft_plans)
 
         self._queue = queue
         self._ready = True
-- 
GitLab