From 4d4432eff20984a33280fa0d9ebdf7c4c770a2cb Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr> Date: Sun, 1 Mar 2020 20:33:07 +0100 Subject: [PATCH] fix spectral poisson curl FFTW, disabled numba in host fft planner, reenabled more tests for spectral solvers, moved hptt methods in tools --- .../host/fortran/operator/fortran_fftw.py | 14 +++---- hysop/fields/tests/test_cartesian.py | 6 +-- hysop/fields/tests/test_cartesian.sh | 2 +- hysop/numerics/fft/host_fft.py | 41 ++++--------------- hysop/operator/tests/test_diffusion.py | 6 +-- .../tests/test_spectral_derivative.py | 17 ++++---- hysop/tools/hptt_utils.py | 38 +++++++++++++++++ 7 files changed, 68 insertions(+), 56 deletions(-) create mode 100644 hysop/tools/hptt_utils.py diff --git a/hysop/backend/host/fortran/operator/fortran_fftw.py b/hysop/backend/host/fortran/operator/fortran_fftw.py index e0efc911a..f71182e3c 100644 --- a/hysop/backend/host/fortran/operator/fortran_fftw.py +++ b/hysop/backend/host/fortran/operator/fortran_fftw.py @@ -45,10 +45,9 @@ class FortranFFTWOperator(FortranOperator): values=CartesianTopologyDescriptors) nb_components =input_fields.keys()[0].nb_components - for f in set(input_fields.keys()+output_fields.keys()): - assert f.nb_components == nb_components, \ - "Number of components mismatch." - for fi in f.fields: + tensor_fields = set(input_fields.keys()+output_fields.keys()) + for tf in tensor_fields: + for fi in tf.fields: if (fi.dtype != HYSOP_REAL): msg='FortranFFTW operators only work with HYSOP_REAL precision specified during hysop ' msg+='build.' @@ -63,10 +62,9 @@ class FortranFFTWOperator(FortranOperator): msg+='\n lboundaries: {}'.format(fi.lboundaries) msg+='\n rboundaries: {}'.format(fi.rboundaries) raise RuntimeError(msg) - - self._scalar_3d = False - if nb_components == 1: - self._scalar_3d = True + + # Special case: 3D diffusion of a scalar + self._scalar_3d = (nb_components == 1) and all(tf.nb_components==1 for tf in tensor_fields) domain = self.input_fields.keys()[0].domain self.dim = domain.dim diff --git a/hysop/fields/tests/test_cartesian.py b/hysop/fields/tests/test_cartesian.py index 7b178728b..43a947fd7 100644 --- a/hysop/fields/tests/test_cartesian.py +++ b/hysop/fields/tests/test_cartesian.py @@ -497,7 +497,7 @@ def test_mpi_ghost_exchange_periodic(comm=None): print msg print '*'*len(msg) print 'test_mpi_ghost_exchange_periodic()'.format(size) - for dim in xrange(1,2+__ENABLE_LONG_TESTS__): + for dim in xrange(1,3+__ENABLE_LONG_TESTS__): if rank==0: print(' >DIM={}'.format(dim)) @@ -610,7 +610,7 @@ def test_mpi_ghost_exchange_runtime(comm=None): print '*'*len(msg) print 'test_mpi_ghost_exchange_runtime()'.format(size) - for dim in xrange(1,2+__ENABLE_LONG_TESTS__): + for dim in xrange(1,3+__ENABLE_LONG_TESTS__): if rank==0: sys.stdout.write('>DIM={}\n'.format(dim)) @@ -694,7 +694,7 @@ def test_mpi_ghost_accumulate_periodic(comm=None): np.int16, np.int32, np.int64, np.uint16, np.uint32, np.uint64) assert size-1 < len(dtypes) - for dim in xrange(1,2+__ENABLE_LONG_TESTS__): + for dim in xrange(1,3+__ENABLE_LONG_TESTS__): if rank==0: print(' >DIM={}'.format(dim)) diff --git a/hysop/fields/tests/test_cartesian.sh b/hysop/fields/tests/test_cartesian.sh index 1b7e6ada8..c739ed878 100755 --- a/hysop/fields/tests/test_cartesian.sh +++ b/hysop/fields/tests/test_cartesian.sh @@ -21,5 +21,5 @@ popd > /dev/null TEST_FILE=$SCRIPT_PATH/test_cartesian.py for i in 2; do - mpirun -np $i python2 $TEST_FILE + mpirun -np $i python2.7 $TEST_FILE done diff --git a/hysop/numerics/fft/host_fft.py b/hysop/numerics/fft/host_fft.py index 3a96f03d9..ceb5c70e6 100644 --- a/hysop/numerics/fft/host_fft.py +++ b/hysop/numerics/fft/host_fft.py @@ -11,39 +11,18 @@ import numpy as np import numba as nb from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__, \ - __DEFAULT_NUMBA_TARGET__ + __DEFAULT_NUMBA_TARGET__ from hysop.tools.types import first_not_None, check_instance from hysop.tools.numba_utils import HAS_NUMBA, bake_numba_copy, bake_numba_accumulate, bake_numba_transpose +from hysop.tools.hptt_utils import HAS_HPTT, hptt, can_exec_hptt, array_share_data from hysop.tools.decorators import static_vars from hysop.backend.host.host_array_backend import HostArrayBackend from hysop.backend.host.host_array import HostArray from hysop.numerics.fft.fft import FFTQueueI, FFTPlanI, FFTI -def can_exec_hptt(src, dst): - if (src is dst): - return False - if (src.dtype != dst.dtype): - return False - if src.dtype not in (np.float32, np.float64, np.complex64, np.complex128): - return False - if src.flags['C_CONTIGUOUS'] != dst.flags['C_CONTIGUOUS']: - return False - if src.flags['F_CONTIGUOUS'] != dst.flags['F_CONTIGUOUS']: - return False - if not (src.flags['C_CONTIGUOUS'] ^ src.flags['F_CONTIGUOUS']): - return False - return (src.data is not dst.data) - -try: - import hptt - HAS_HPTT=True - # required version is: https://gitlab.com/keckj/hptt -except ImportError: - HAS_HPTT=False - import warnings - from hysop.tools.warning import HysopPerformanceWarning - msg='Failed to import HPTT module, falling back to slow numpy transpose. Required version is available at https://gitlab.com/keckj/hptt.' - warnings.warn(msg, HysopPerformanceWarning) +# Currently there is a bug for non contiguous arrays +# in numba so we disable it +HAS_NUMBA=False class DummyEvent(object): @classmethod @@ -137,9 +116,7 @@ class HostFFTI(FFTI): @static_vars(numba_copy=None) def exec_copy(src=src, dst=dst): src, dst = src(), dst() - if (src.ndim == 1): - dst[...] = src - elif HAS_HPTT and can_exec_hptt(src, dst): + if can_exec_hptt(src, dst): hptt.tensorTransposeAndUpdate(perm=range(src.ndim), alpha=1.0, A=src, beta=0.0, B=dst) elif HAS_NUMBA: @@ -157,7 +134,7 @@ class HostFFTI(FFTI): @static_vars(numba_accumulate=None) def exec_accumulate(src=src, dst=dst): src, dst = src(), dst() - if HAS_HPTT and can_exec_hptt(src, dst): + if can_exec_hptt(src, dst): hptt.tensorTransposeAndUpdate(perm=range(src.ndim), alpha=1.0, A=src, beta=1.0, B=dst) elif HAS_NUMBA: @@ -166,7 +143,7 @@ class HostFFTI(FFTI): exec_accumulate.numba_accumulate() else: dst[...] += src - return exec_copy + return exec_accumulate def plan_transpose(self, tg, src, dst, axes): src = self.ensure_callable(src) @@ -175,7 +152,7 @@ class HostFFTI(FFTI): @static_vars(numba_transpose=None) def exec_transpose(src=src, dst=dst, axes=axes): src, dst = src(), dst() - if HAS_HPTT and can_exec_hptt(src, dst): + if can_exec_hptt(src, dst): hptt.tensorTransposeAndUpdate(perm=axes, alpha=1.0, A=src, beta=0.0, B=dst) elif HAS_NUMBA: diff --git a/hysop/operator/tests/test_diffusion.py b/hysop/operator/tests/test_diffusion.py index d326ddbb3..773801b8b 100644 --- a/hysop/operator/tests/test_diffusion.py +++ b/hysop/operator/tests/test_diffusion.py @@ -267,14 +267,12 @@ class TestDiffusionOperator(object): def test_diffusion_2D_inplace(self): self._test(dim=2, is_inplace=True) def test_diffusion_3D_inplace(self): - if __ENABLE_LONG_TESTS__: - self._test(dim=3, is_inplace=True) + self._test(dim=3, is_inplace=True) def perform_tests(self): self.test_diffusion_1D_inplace() self.test_diffusion_2D_inplace() - if __ENABLE_LONG_TESTS__: - self.test_diffusion_3D_inplace() + self.test_diffusion_3D_inplace() print diff --git a/hysop/operator/tests/test_spectral_derivative.py b/hysop/operator/tests/test_spectral_derivative.py index 2bbe4e45c..f69e7df2a 100644 --- a/hysop/operator/tests/test_spectral_derivative.py +++ b/hysop/operator/tests/test_spectral_derivative.py @@ -117,7 +117,8 @@ class TestSpectralDerivative(object): domain_boundaries = list(domain_boundary_iterator(dim=dim)) periodic = domain_boundaries[0] domain_boundaries = domain_boundaries[1:] - random.shuffle(domain_boundaries) + if (max_runs is not None): + random.shuffle(domain_boundaries) domain_boundaries.insert(0, periodic) for i, (lboundaries, rboundaries) in enumerate(domain_boundaries, 1): @@ -213,7 +214,6 @@ class TestSpectralDerivative(object): op = SpectralSpaceDerivative(cl_env=cl_env, **base_kwds) yield op.to_graph() print - print else: msg='Unknown implementation to test {}.'.format(impl) raise NotImplementedError(msg) @@ -270,7 +270,11 @@ class TestSpectralDerivative(object): eps = npw.finfo(fout.dtype).eps dist = npw.abs(fout-fref) dinf = npw.max(dist) - deps = int(dinf/eps) + try: + deps = int(dinf/eps) + except OverflowError: + import numpy as np + deps = np.inf if (deps <= 10**(nidx+2)): if (j==1): print '{}eps ({})'.format(deps, dinf), @@ -341,12 +345,9 @@ class TestSpectralDerivative(object): self._test(dim=3, dtype=npw.float32, polynomial=True, **kwds) def perform_tests(self): - max_2d_runs = None if __ENABLE_LONG_TESTS__ else 2 - max_3d_runs = None if __ENABLE_LONG_TESTS__ else 2 - self.test_1d_trigonometric_float32(max_derivative=3) - # self.test_2d_trigonometric_float32(max_derivative=2, max_runs=max_2d_runs) - self.test_3d_trigonometric_float32(max_derivative=1, max_runs=max_3d_runs) + self.test_2d_trigonometric_float32(max_derivative=1, max_runs=None) + self.test_3d_trigonometric_float32(max_derivative=1, max_runs=5) if __ENABLE_LONG_TESTS__: # self.test_1d_trigonometric_float64(max_derivative=3) diff --git a/hysop/tools/hptt_utils.py b/hysop/tools/hptt_utils.py new file mode 100644 index 000000000..ec7af8190 --- /dev/null +++ b/hysop/tools/hptt_utils.py @@ -0,0 +1,38 @@ +import numpy as np + +try: + import hptt + HAS_HPTT=True + # required version is: https://gitlab.com/keckj/hptt +except ImportError: + hptt = None + HAS_HPTT=False + import warnings + from hysop.tools.warning import HysopPerformanceWarning + msg='Failed to import HPTT module, falling back to slow numpy transpose. Required version is available at https://gitlab.com/keckj/hptt.' + warnings.warn(msg, HysopPerformanceWarning) + +def array_share_data(a,b): + abeg, aend = np.byte_bounds(a) + bbeg, bend = np.byte_bounds(b) + beg, end = max(abeg, bbeg), min(aend, bend) + return (end-beg > 0) + +if HAS_HPTT: + def can_exec_hptt(src, dst): + if (src is dst): + return False + if (src.dtype != dst.dtype): + return False + if src.dtype not in (np.float32, np.float64, np.complex64, np.complex128): + return False + if src.flags['C_CONTIGUOUS'] != dst.flags['C_CONTIGUOUS']: + return False + if src.flags['F_CONTIGUOUS'] != dst.flags['F_CONTIGUOUS']: + return False + if not (src.flags['C_CONTIGUOUS'] ^ src.flags['F_CONTIGUOUS']): + return False + return not array_share_data(src, dst) +else: + def can_exec_hptt(src, dst): + return False -- GitLab