From 4d4432eff20984a33280fa0d9ebdf7c4c770a2cb Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr>
Date: Sun, 1 Mar 2020 20:33:07 +0100
Subject: [PATCH] fix spectral poisson curl FFTW, disabled numba in host fft
 planner, reenabled more tests for spectral solvers, moved hptt methods in
 tools

---
 .../host/fortran/operator/fortran_fftw.py     | 14 +++----
 hysop/fields/tests/test_cartesian.py          |  6 +--
 hysop/fields/tests/test_cartesian.sh          |  2 +-
 hysop/numerics/fft/host_fft.py                | 41 ++++---------------
 hysop/operator/tests/test_diffusion.py        |  6 +--
 .../tests/test_spectral_derivative.py         | 17 ++++----
 hysop/tools/hptt_utils.py                     | 38 +++++++++++++++++
 7 files changed, 68 insertions(+), 56 deletions(-)
 create mode 100644 hysop/tools/hptt_utils.py

diff --git a/hysop/backend/host/fortran/operator/fortran_fftw.py b/hysop/backend/host/fortran/operator/fortran_fftw.py
index e0efc911a..f71182e3c 100644
--- a/hysop/backend/host/fortran/operator/fortran_fftw.py
+++ b/hysop/backend/host/fortran/operator/fortran_fftw.py
@@ -45,10 +45,9 @@ class FortranFFTWOperator(FortranOperator):
                        values=CartesianTopologyDescriptors)
 
         nb_components =input_fields.keys()[0].nb_components
-        for f in set(input_fields.keys()+output_fields.keys()):
-            assert f.nb_components == nb_components, \
-                "Number of components mismatch."
-            for fi in f.fields:
+        tensor_fields = set(input_fields.keys()+output_fields.keys())
+        for tf in tensor_fields:
+            for fi in tf.fields:
                 if (fi.dtype != HYSOP_REAL):
                     msg='FortranFFTW operators only work with HYSOP_REAL precision specified during hysop '
                     msg+='build.'
@@ -63,10 +62,9 @@ class FortranFFTWOperator(FortranOperator):
                     msg+='\n  lboundaries: {}'.format(fi.lboundaries)
                     msg+='\n  rboundaries: {}'.format(fi.rboundaries)
                     raise RuntimeError(msg)
-
-        self._scalar_3d = False
-        if nb_components == 1:
-            self._scalar_3d = True
+    
+        # Special case: 3D diffusion of a scalar 
+        self._scalar_3d = (nb_components == 1) and all(tf.nb_components==1 for tf in tensor_fields)
 
         domain = self.input_fields.keys()[0].domain
         self.dim      = domain.dim
diff --git a/hysop/fields/tests/test_cartesian.py b/hysop/fields/tests/test_cartesian.py
index 7b178728b..43a947fd7 100644
--- a/hysop/fields/tests/test_cartesian.py
+++ b/hysop/fields/tests/test_cartesian.py
@@ -497,7 +497,7 @@ def test_mpi_ghost_exchange_periodic(comm=None):
         print msg
         print '*'*len(msg)
         print 'test_mpi_ghost_exchange_periodic()'.format(size)
-    for dim in xrange(1,2+__ENABLE_LONG_TESTS__):
+    for dim in xrange(1,3+__ENABLE_LONG_TESTS__):
         if rank==0:
             print('  >DIM={}'.format(dim))
 
@@ -610,7 +610,7 @@ def test_mpi_ghost_exchange_runtime(comm=None):
         print '*'*len(msg)
         print 'test_mpi_ghost_exchange_runtime()'.format(size)
 
-    for dim in xrange(1,2+__ENABLE_LONG_TESTS__):
+    for dim in xrange(1,3+__ENABLE_LONG_TESTS__):
         if rank==0:
             sys.stdout.write('>DIM={}\n'.format(dim))
 
@@ -694,7 +694,7 @@ def test_mpi_ghost_accumulate_periodic(comm=None):
               np.int16,  np.int32,  np.int64,
               np.uint16, np.uint32, np.uint64)
     assert size-1 < len(dtypes)
-    for dim in xrange(1,2+__ENABLE_LONG_TESTS__):
+    for dim in xrange(1,3+__ENABLE_LONG_TESTS__):
         if rank==0:
             print('  >DIM={}'.format(dim))
 
diff --git a/hysop/fields/tests/test_cartesian.sh b/hysop/fields/tests/test_cartesian.sh
index 1b7e6ada8..c739ed878 100755
--- a/hysop/fields/tests/test_cartesian.sh
+++ b/hysop/fields/tests/test_cartesian.sh
@@ -21,5 +21,5 @@ popd  > /dev/null
 TEST_FILE=$SCRIPT_PATH/test_cartesian.py
 
 for i in 2; do
-     mpirun -np $i python2 $TEST_FILE
+     mpirun -np $i python2.7 $TEST_FILE
 done
diff --git a/hysop/numerics/fft/host_fft.py b/hysop/numerics/fft/host_fft.py
index 3a96f03d9..ceb5c70e6 100644
--- a/hysop/numerics/fft/host_fft.py
+++ b/hysop/numerics/fft/host_fft.py
@@ -11,39 +11,18 @@ import numpy as np
 import numba as nb
 
 from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__, \
-                    __DEFAULT_NUMBA_TARGET__
+                  __DEFAULT_NUMBA_TARGET__
 from hysop.tools.types import first_not_None, check_instance
 from hysop.tools.numba_utils import HAS_NUMBA, bake_numba_copy, bake_numba_accumulate, bake_numba_transpose
+from hysop.tools.hptt_utils import HAS_HPTT, hptt, can_exec_hptt, array_share_data
 from hysop.tools.decorators import static_vars
 from hysop.backend.host.host_array_backend import HostArrayBackend
 from hysop.backend.host.host_array import HostArray
 from hysop.numerics.fft.fft import FFTQueueI, FFTPlanI, FFTI
 
-def can_exec_hptt(src, dst):
-    if (src is dst):
-        return False
-    if (src.dtype != dst.dtype):
-        return False
-    if src.dtype not in (np.float32, np.float64, np.complex64, np.complex128):
-        return False
-    if src.flags['C_CONTIGUOUS'] != dst.flags['C_CONTIGUOUS']:
-        return False
-    if src.flags['F_CONTIGUOUS'] != dst.flags['F_CONTIGUOUS']:
-        return False
-    if not (src.flags['C_CONTIGUOUS'] ^ src.flags['F_CONTIGUOUS']):
-        return False
-    return (src.data is not dst.data)
-
-try:
-    import hptt
-    HAS_HPTT=True
-    # required version is: https://gitlab.com/keckj/hptt
-except ImportError:
-    HAS_HPTT=False
-    import warnings
-    from hysop.tools.warning import HysopPerformanceWarning
-    msg='Failed to import HPTT module, falling back to slow numpy transpose. Required version is available at https://gitlab.com/keckj/hptt.'
-    warnings.warn(msg, HysopPerformanceWarning)
+# Currently there is a bug for non contiguous arrays
+# in numba so we disable it
+HAS_NUMBA=False
 
 class DummyEvent(object):
     @classmethod
@@ -137,9 +116,7 @@ class HostFFTI(FFTI):
         @static_vars(numba_copy=None)
         def exec_copy(src=src, dst=dst):
             src, dst = src(), dst()
-            if (src.ndim == 1):
-                dst[...] = src
-            elif HAS_HPTT and can_exec_hptt(src, dst):
+            if can_exec_hptt(src, dst):
                 hptt.tensorTransposeAndUpdate(perm=range(src.ndim),
                         alpha=1.0, A=src, beta=0.0, B=dst)
             elif HAS_NUMBA:
@@ -157,7 +134,7 @@ class HostFFTI(FFTI):
         @static_vars(numba_accumulate=None)
         def exec_accumulate(src=src, dst=dst):
             src, dst = src(), dst()
-            if HAS_HPTT and can_exec_hptt(src, dst):
+            if can_exec_hptt(src, dst):
                 hptt.tensorTransposeAndUpdate(perm=range(src.ndim),
                         alpha=1.0, A=src, beta=1.0, B=dst)
             elif HAS_NUMBA:
@@ -166,7 +143,7 @@ class HostFFTI(FFTI):
                 exec_accumulate.numba_accumulate()
             else:
                 dst[...] += src
-        return exec_copy
+        return exec_accumulate
 
     def plan_transpose(self, tg, src, dst, axes):
         src = self.ensure_callable(src)
@@ -175,7 +152,7 @@ class HostFFTI(FFTI):
         @static_vars(numba_transpose=None)
         def exec_transpose(src=src, dst=dst, axes=axes):
             src, dst = src(), dst()
-            if HAS_HPTT and can_exec_hptt(src, dst):
+            if can_exec_hptt(src, dst):
                 hptt.tensorTransposeAndUpdate(perm=axes,
                         alpha=1.0, A=src, beta=0.0, B=dst)
             elif HAS_NUMBA:
diff --git a/hysop/operator/tests/test_diffusion.py b/hysop/operator/tests/test_diffusion.py
index d326ddbb3..773801b8b 100644
--- a/hysop/operator/tests/test_diffusion.py
+++ b/hysop/operator/tests/test_diffusion.py
@@ -267,14 +267,12 @@ class TestDiffusionOperator(object):
     def test_diffusion_2D_inplace(self):
         self._test(dim=2, is_inplace=True)
     def test_diffusion_3D_inplace(self):
-        if __ENABLE_LONG_TESTS__:
-            self._test(dim=3, is_inplace=True)
+        self._test(dim=3, is_inplace=True)
 
     def perform_tests(self):
         self.test_diffusion_1D_inplace()
         self.test_diffusion_2D_inplace()
-        if __ENABLE_LONG_TESTS__:
-            self.test_diffusion_3D_inplace()
+        self.test_diffusion_3D_inplace()
         print
 
 
diff --git a/hysop/operator/tests/test_spectral_derivative.py b/hysop/operator/tests/test_spectral_derivative.py
index 2bbe4e45c..f69e7df2a 100644
--- a/hysop/operator/tests/test_spectral_derivative.py
+++ b/hysop/operator/tests/test_spectral_derivative.py
@@ -117,7 +117,8 @@ class TestSpectralDerivative(object):
         domain_boundaries = list(domain_boundary_iterator(dim=dim))
         periodic = domain_boundaries[0]
         domain_boundaries = domain_boundaries[1:]
-        random.shuffle(domain_boundaries)
+        if (max_runs is not None):
+            random.shuffle(domain_boundaries)
         domain_boundaries.insert(0, periodic)
 
         for i, (lboundaries, rboundaries) in enumerate(domain_boundaries, 1):
@@ -213,7 +214,6 @@ class TestSpectralDerivative(object):
                         op = SpectralSpaceDerivative(cl_env=cl_env, **base_kwds)
                         yield op.to_graph()
                         print
-                    print
                 else:
                     msg='Unknown implementation to test {}.'.format(impl)
                     raise NotImplementedError(msg)
@@ -270,7 +270,11 @@ class TestSpectralDerivative(object):
                     eps  = npw.finfo(fout.dtype).eps
                     dist = npw.abs(fout-fref)
                     dinf = npw.max(dist)
-                    deps = int(dinf/eps)
+                    try:
+                        deps = int(dinf/eps)
+                    except OverflowError:
+                        import numpy as np
+                        deps = np.inf
                     if (deps <= 10**(nidx+2)):
                         if (j==1):
                             print '{}eps ({})'.format(deps, dinf),
@@ -341,12 +345,9 @@ class TestSpectralDerivative(object):
         self._test(dim=3, dtype=npw.float32, polynomial=True, **kwds)
 
     def perform_tests(self):
-        max_2d_runs = None if __ENABLE_LONG_TESTS__ else 2
-        max_3d_runs = None if __ENABLE_LONG_TESTS__ else 2
-
         self.test_1d_trigonometric_float32(max_derivative=3)
-        # self.test_2d_trigonometric_float32(max_derivative=2, max_runs=max_2d_runs)
-        self.test_3d_trigonometric_float32(max_derivative=1, max_runs=max_3d_runs)
+        self.test_2d_trigonometric_float32(max_derivative=1, max_runs=None)
+        self.test_3d_trigonometric_float32(max_derivative=1, max_runs=5)
 
         if __ENABLE_LONG_TESTS__:
             # self.test_1d_trigonometric_float64(max_derivative=3)
diff --git a/hysop/tools/hptt_utils.py b/hysop/tools/hptt_utils.py
new file mode 100644
index 000000000..ec7af8190
--- /dev/null
+++ b/hysop/tools/hptt_utils.py
@@ -0,0 +1,38 @@
+import numpy as np
+
+try:
+    import hptt
+    HAS_HPTT=True
+    # required version is: https://gitlab.com/keckj/hptt
+except ImportError:
+    hptt = None
+    HAS_HPTT=False
+    import warnings
+    from hysop.tools.warning import HysopPerformanceWarning
+    msg='Failed to import HPTT module, falling back to slow numpy transpose. Required version is available at https://gitlab.com/keckj/hptt.'
+    warnings.warn(msg, HysopPerformanceWarning)
+
+def array_share_data(a,b):
+    abeg, aend = np.byte_bounds(a)
+    bbeg, bend = np.byte_bounds(b)
+    beg, end = max(abeg, bbeg), min(aend, bend)
+    return (end-beg > 0)
+
+if HAS_HPTT:
+    def can_exec_hptt(src, dst):
+        if (src is dst):
+            return False
+        if (src.dtype != dst.dtype):
+            return False
+        if src.dtype not in (np.float32, np.float64, np.complex64, np.complex128):
+            return False
+        if src.flags['C_CONTIGUOUS'] != dst.flags['C_CONTIGUOUS']:
+            return False
+        if src.flags['F_CONTIGUOUS'] != dst.flags['F_CONTIGUOUS']:
+            return False
+        if not (src.flags['C_CONTIGUOUS'] ^ src.flags['F_CONTIGUOUS']):
+            return False
+        return not array_share_data(src, dst)
+else:
+    def can_exec_hptt(src, dst):
+        return False
-- 
GitLab