diff --git a/examples/example_utils.py b/examples/example_utils.py index b11eaa2b1883d50650dca28a4d8bc82ae9cc7319..909ebdbcb657c6cc5f72c6a258a79c583647572a 100644 --- a/examples/example_utils.py +++ b/examples/example_utils.py @@ -539,7 +539,7 @@ class HysopArgParser(argparse.ArgumentParser): threading.add_argument('--fftw-threads', type=str, default=None, dest='fftw_threads', help='This parameter will set HYSOP_FFTW_NUM_THREADS to a custom value (overrides --max-threads).') - threading.add_argument('--fftw-planner-effort', type=str, default='FFTW_ESTIMATE', + threading.add_argument('--fftw-planner-effort', type=str, default='estimate', dest='fftw_planner_effort', help='Set default planning effort for FFTW plans. The actual number of threads used by FFTW may depend on the planning step. This parameter will set HYSOP_FFTW_PLANNER_EFFORT.') threading.add_argument('--fftw-planner-timelimit', type=str, default='-1', @@ -1100,14 +1100,10 @@ class HysopArgParser(argparse.ArgumentParser): def _convert_fftw_planner_effort(self, argname, val): values = { - 'FFTW_ESTIMATE': 'FFTW_ESTIMATE', - 'FFTW_MEASURE': 'FFTW_MEASURE', - 'FFTW_PATIENT': 'FFTW_PATIENT', - 'FFTW_EXHAUSTIVE': 'FFTW_EXHAUSTIVE', - 'fftw_estimate': 'FFTW_ESTIMATE', - 'fftw_measure': 'FFTW_MEASURE', - 'fftw_patient': 'FFTW_PATIENT', - 'fftw_exhaustive': 'FFTW_EXHAUSTIVE', + 'estimate': 'FFTW_ESTIMATE', + 'measure': 'FFTW_MEASURE', + 'patient': 'FFTW_PATIENT', + 'exhaustive': 'FFTW_EXHAUSTIVE', } return self._check_convert(argname, val, values) diff --git a/hysop/__init__.py.in b/hysop/__init__.py.in index 0ac87c567fbbd66ee5de98fd45e044128160f0e7..4f91a6fed877ee987d4638caba2e432504ce3497 100644 --- a/hysop/__init__.py.in +++ b/hysop/__init__.py.in @@ -58,8 +58,8 @@ set_env('NUMBA_THREADING_LAYER', 'workqueue') # Use 'numba -s' to list support __DEFAULT_NUMBA_TARGET__ = ('parallel' if __ENABLE_THREADING__ else 'cpu') # FFTW -__FFTW_NUM_THREADS__ = get_env('FFTW_NUM_THREADS', __MAX_THREADS__) -__FFTW_PLANNER_EFFORT__ = get_env('FFTW_PLANNER_EFFORT', 'FFTW_ESTIMATE') +__FFTW_NUM_THREADS__ = int(get_env('FFTW_NUM_THREADS', __MAX_THREADS__)) +__FFTW_PLANNER_EFFORT__ = get_env('FFTW_PLANNER_EFFORT', 'FFTW_ESTIMATE') __FFTW_PLANNER_TIMELIMIT__ = int(get_env('FFTW_PLANNER_TIMELIMIT', -1)) # OpenCL diff --git a/hysop/numerics/fft/fftw_fft.py b/hysop/numerics/fft/fftw_fft.py index 72bcda107656df68d19b470a25cae5e1630466de..bd86eae3bd1f6fba9a59fe1d9e709daafefc611d 100644 --- a/hysop/numerics/fft/fftw_fft.py +++ b/hysop/numerics/fft/fftw_fft.py @@ -8,9 +8,13 @@ import warnings import pyfftw import numpy as np -from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__ +from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__, \ + __VERBOSE__ +from hysop.tools.io_utils import IO from hysop.tools.types import first_not_None from hysop.tools.misc import prod +from hysop.tools.string_utils import framed_str +from hysop.tools.cache import load_data_from_cache, update_cache from hysop.numerics.fft.fft import HysopFFTWarning, bytes2str from hysop.numerics.fft.host_fft import HostFFTPlanI, HostFFTI, HostArray @@ -21,6 +25,29 @@ class FftwFFTPlan(HostFFTPlanI): Emit warnings when changing input and output alignment. """ + __FFTW_USE_CACHE__=True + + @classmethod + def cache_file(cls): + _cache_dir = IO.cache_path() + '/numerics' + _cache_file = _cache_dir + '/fftw_wisdom.pklz' + return _cache_file + + @classmethod + def load_wisdom(cls, h): + if cls.__FFTW_USE_CACHE__: + wisdom = load_data_from_cache(cls.cache_file(), h) + if (wisdom is not None): + pyfftw.import_wisdom(wisdom) + return True + return False + + @classmethod + def save_wisdom(cls, h, plan): + if cls.__FFTW_USE_CACHE__: + wisdom = pyfftw.export_wisdom() + update_cache(cls.cache_file(), h, wisdom) + def __init__(self, a, out, scaling=None, **plan_kwds): super(FftwFFTPlan, self).__init__() @@ -33,12 +60,62 @@ class FftwFFTPlan(HostFFTPlanI): plan_kwds['output_array'] = out.handle else: plan_kwds['output_array'] = out + + def fmt_arg(name): + return plan_kwds[name] + def fmt_array(name): + arr = fmt_arg(name) + return 'shape={:<16} strides={:<16} dtype={:<16}'.format( + str(arr.shape)+',', + str(arr.strides)+',', + arr.dtype) - plan = pyfftw.FFTW(**plan_kwds) + title=' Planning {} '.format(self.__class__.__name__) + msg = \ + ''' in_array: {} + out_array: {} + axes: {} + direction: {} + threads: {} + flags: {} + planning timelimit: {}'''.format( + fmt_array('input_array'), + fmt_array('output_array'), + fmt_arg('axes'), + fmt_arg('direction'), + fmt_arg('threads'), + ' | '.join(fmt_arg('flags')), + fmt_arg('planning_timelimit')) + if __VERBOSE__: + print + print framed_str(title, msg, c='*') + + def hash_arg(name): + return hash(plan_kwds[name]) + def hash_array(name): + arr = plan_kwds[name] + return hash(arr.shape) ^ hash(arr.strides) + #h = hash_array('input_array') ^ hash_array('output_array') ^ hash_arg('axes') ^ hash_arg('direction') + h = None + + plan = None + may_have_wisdom = self.load_wisdom(h) + if may_have_wisdom: + plan_kwds['flags'] += ('FFTW_WISDOM_ONLY',) + # try to build plan from wisdom only (can fail if wisdom has only measure knowledge for example) + try: + plan = pyfftw.FFTW(**plan_kwds) + except RuntimeError: + pass + if (plan is None): + plan_kwds['flags'] = tuple(set(plan_kwds['flags']) - set(['FFTW_WISDOM_ONLY'])) + plan = pyfftw.FFTW(**plan_kwds) + self.save_wisdom(h, plan) if (not plan.simd_aligned): msg='Resulting plan is not SIMD aligned ({} bytes boundary).' msg=msg.format(pyfftw.simd_alignment) warnings.warn(msg, HysopFFTWarning) + self.plan = plan self.scaling = scaling self.out = out