From 23f33146defbfa557cec3ac99a5b068f17d03d9a Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr> Date: Thu, 29 Nov 2018 12:04:24 +0100 Subject: [PATCH] better example interface including threads and fft parameters --- examples/example_utils.py | 98 ++++++++++++++++++++++++++++++++-- hysop/__init__.py.in | 8 ++- hysop/numerics/fft/fftw_fft.py | 8 ++- hysop/numerics/fft/host_fft.py | 10 ++-- 4 files changed, 113 insertions(+), 11 deletions(-) diff --git a/examples/example_utils.py b/examples/example_utils.py index 30ee3c338..11f7742a8 100644 --- a/examples/example_utils.py +++ b/examples/example_utils.py @@ -1,4 +1,4 @@ -import os, argparse, tempfile, colors, textwrap, warnings, contextlib, tee, re, errno, shutil +import os, argparse, tempfile, colors, textwrap, warnings, contextlib, tee, re, errno, shutil, psutil from argparse_color_formatter import ColorHelpFormatter # Fix a bug in the tee module ######### @@ -83,8 +83,9 @@ class HysopArgParser(argparse.ArgumentParser): return (cls.get_fs_type(path) in ('nfs',)) @classmethod - def set_env(cls, target, value): - target = 'HYSOP_{}'.format(target) + def set_env(cls, target, value, hysop=True): + if hysop: + target = 'HYSOP_{}'.format(target) if (value is None): pass elif (value is True): @@ -135,6 +136,7 @@ class HysopArgParser(argparse.ArgumentParser): self._add_domain_args() self._add_simu_args() self._add_method_args() + self._add_threading_args() self._add_opencl_args() self._add_autotuner_args() self._add_graphical_io_args() @@ -145,6 +147,9 @@ class HysopArgParser(argparse.ArgumentParser): def parse(self): args = self.parse_args() args.__class__ = HysopNamespace + + self._check_fft_args(args) + self._check_threading_args(args) self._setup_hysop_env(args) @@ -289,7 +294,7 @@ class HysopArgParser(argparse.ArgumentParser): args = self.add_argument_group('Main parameters') args.add_argument('-impl', '--implementation', type=str, default='python', dest='impl', - help='Backend implementation (either python or opencl).') + help='Backend implementation (either python, fortran or opencl).') args.add_argument('-cp', '--compute-precision', type=str, default='fp32', dest='compute_precision', help='Floating-point precision used to discretize the parameters and fields.') @@ -472,6 +477,38 @@ class HysopArgParser(argparse.ArgumentParser): args.interpolation = self._convert_interpolation('interpolation', args.interpolation) + def _add_threading_args(self): + threading = self.add_argument_group('threading parameters') + msg = "Enable threads for backends that supports it (Numba and FFTW) by setting HYSOP_ENABLE_THREADS. " + msg += "Disabling threading will limit all threading backends to one thread and set numba default backend to 'cpu' instead of 'parallel'." + threading.add_argument('--enable-threading', type=str, default=True, + dest='enable_threading', + help=msg) + msg="If this parameter is set to 'physical', the maximum number of threads will be set to the number of physical cores available to the process (taking into account the cpuset). " + msg+="If set to 'logical', logical cores will be chosen instead. Else this parameter expects a positive integer." + msg+='Set the default maximum usable threads for threading backends (OpenMP, MKL) and operator backends using threads (Numba, FFTW). ' + msg+='This parameter will set HYSOP_MAX_THREADS and does not affect the OpenCL backend.' + msg+='If --enable-threads is set to False, this parameter is ignored and HYSOP_MAX_THREADS will be set to 1.' + threading.add_argument('--max-threads', type=str, default='physical', + dest='max_threads', + help=msg) + threading.add_argument('--openmp-threads', type=str, default=None, + dest='openmp_threads', + help='This parameter will set OMP_NUM_THREADS to a custom value (overrides --max-threads).') + threading.add_argument('--mkl-threads', type=str, default=None, + dest='mkl_threads', + help='This parameter will set MKL_NUM_THREADS to a custom value (overrides --max-threads).') + threading.add_argument('--fftw-threads', type=str, default=None, + dest='fftw_threads', + help='This parameter will set FFTW_NUM_THREADS to a custom value (overrides --max-threads).') + threading.add_argument('--numba-threads', type=str, default=None, + dest='numba_threads', + help='This parameter will set NUMBA_NUM_THREADS to a custom value (overrides --max-threads).') + threading.add_argument('--numba-threading-layer', type=str, default='workqueue', + dest='numba_threading_layer', + help="This parameter will set NUMBA_THREADING_LAYER to a custom value ('workqueue' is available on all platforms, but not 'omp' and 'tbb'). Use 'numba -s' to list available numba threading layers.") + return threading + def _add_opencl_args(self): opencl = self.add_argument_group('OpenCL parameters') @@ -509,6 +546,20 @@ class HysopArgParser(argparse.ArgumentParser): dest='cl_enable_loop_unrolling', help='Enable loop unrolling for code-generated OpenCL kernels.') return opencl + + def _check_threading_args(self, args): + self._check_default(args, ('enable_threading', 'max_threads', 'numba_threading_layer'), str, allow_none=False) + self._check_default(args, ('openmp_threads', 'mkl_threads', 'numba_threads'), str, allow_none=True) + + args.enable_threading = self._convert_bool('enable_threading', args.enable_threading) + if args.enable_threading: + args.max_threads = self._convert_threads('max_threads', args.max_threads, default=None) + else: + args.max_threads = 1 + for argname in ('openmp_threads', 'mkl_threads', 'numba_threads', 'fftw_threads'): + setattr(args, argname, self._convert_threads(argname, getattr(args, argname), default=args.max_threads)) + + args.numba_threading_layer = self._convert_numba_threading_layer('numba_threading_layer', args.numba_threading_layer) def _check_opencl_args(self, args): self._check_default(args, ('cl_platform_id', 'cl_device_id'), int, allow_none=True) @@ -923,6 +974,18 @@ class HysopArgParser(argparse.ArgumentParser): msg='Uknown tracing module \'{}\'.'.format(module) self.error(msg) + set_env('ENABLE_THREADING', args.enable_threading) + set_env('MAX_THREADS', args.max_threads) + set_env('OMP_NUM_THREADS', args.openmp_threads, False) + set_env('MKL_NUM_THREADS', args.mkl_threads, False) + set_env('NUMBA_NUM_THREADS', args.numba_threads, False) + set_env('NUMBA_THREADING_LAYER', args.numba_threading_layer, False + set_env('FFTW_NUM_THREADS', args.fftw_threads, False) + if (args.fftw_planner_effort is not None): + set_env('FFTW_PLANNER_EFFORT', args.fftw_planner_effort, False) + if (args.fftw_planner_timelimite is not None): + set_env('FFTW_PLANNER_TIMELIMIT', args.fftw_planner_timelimite, False) + def _setup_parameters(self, args): pass @@ -972,6 +1035,33 @@ class HysopArgParser(argparse.ArgumentParser): '1': True, } return self._check_convert(argname, val, values) + + def _convert_threads(self, argname, val, default): + if (val == 'physical'): + val = psutil.cpu_count(logical=False) + elif (val == 'logical'): + val = psutil.cpu_count(logical=True) + elif (val is None): + if (default is None): + msg = "'Parameter '{}' has been set to None and no default value has been set." + msg = msg.format(argname) + self.error(msg) + else: + val = default + val = int(val) + if not (val > 0): + msg = "'Parameter '{}' has been set to an invalid number of threads {}." + msg = msg.format(argname, val) + self.error(msg) + return val + + def _convert_numba_threading_layer(self, argname, val): + values = { + 'workqueue': 'workqueue', + 'omp': 'omp', + 'tbb': 'tbb' + } + return self._check_convert(argname, val, values) def _convert_implementation(self, argname, impl): from hysop.constants import Implementation diff --git a/hysop/__init__.py.in b/hysop/__init__.py.in index 9e79793ef..61931fee2 100644 --- a/hysop/__init__.py.in +++ b/hysop/__init__.py.in @@ -50,14 +50,18 @@ __ENABLE_LONG_TESTS__ = get_env('ENABLE_LONG_TESTS', ("@ENABLE_LONG_TESTS@" is " # Threads __ENABLE_THREADING__ = get_env('ENABLE_THREADING', True) -__MAX_THREADS__ = int(get_env('MAX_THREADS', psutil.cpu_count(logical=True)) - if __ENABLE_THREADING__ else 1) +__MAX_THREADS__ = int(get_env('MAX_THREADS', psutil.cpu_count(logical=False)) if __ENABLE_THREADING__ else 1) set_env('OMP_NUM_THREADS', __MAX_THREADS__) set_env('MKL_NUM_THREADS', __MAX_THREADS__) set_env('NUMBA_NUM_THREADS', __MAX_THREADS__) set_env('NUMBA_THREADING_LAYER', 'workqueue') # Use 'numba -s' to list support __DEFAULT_NUMBA_TARGET__ = ('parallel' if __ENABLE_THREADING__ else 'cpu') +# FFTW +__FFTW_NUM_THREADS__ = get_env('FFTW_NUM_THREADS', __MAX_THREADS__) +__FFTW_PLANNER_EFFORT__ = get_env('FFTW_PLANNER_EFFORT', 'FFTW_ESTIMATE') +__FFTW_PLANNER_TIMELIMIT__ = get_env('FFTW_PLANNER_TIMELIMIT', None) + # OpenCL __DEFAULT_PLATFORM_ID__ = int(get_env('DEFAULT_PLATFORM_ID', @OPENCL_DEFAULT_OPENCL_PLATFORM_ID@)) __DEFAULT_DEVICE_ID__ = int(get_env('DEFAULT_DEVICE_ID', @OPENCL_DEFAULT_OPENCL_DEVICE_ID@)) diff --git a/hysop/numerics/fft/fftw_fft.py b/hysop/numerics/fft/fftw_fft.py index 1557db97f..855a2e7cb 100644 --- a/hysop/numerics/fft/fftw_fft.py +++ b/hysop/numerics/fft/fftw_fft.py @@ -8,6 +8,7 @@ import warnings import pyfftw import numpy as np +from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__ from hysop.tools.types import first_not_None from hysop.tools.misc import prod from hysop.numerics.fft.fft import HysopFFTWarning, bytes2str @@ -102,14 +103,17 @@ class FftwFFT(HostFFTI): Planning destroys initial arrays content. """ - def __init__(self, threads=1, - planner_effort='FFTW_MEASURE', + def __init__(self, threads=None, + planner_effort=None, planning_timelimit=None, destroy_input=False, warn_on_allocation=True, warn_on_misalignment=True, backend=None, allocator=None, **kwds): + threads = first_not_None(threads, __FFTW_NUM_THREADS__) + planner_effort = first_not_None(planner_effort, __FFTW_PLANNER_EFFORT__) + planning_timelimit = first_not_None(planner_timelimit, __FFTW_PLANNER_TIMELIMIT__) super(FftwFFT, self).__init__(backend=backend, allocator=allocator, warn_on_allocation=warn_on_allocation, **kwds) self.supported_ftypes = (np.float32, np.float64, np.longdouble) diff --git a/hysop/numerics/fft/host_fft.py b/hysop/numerics/fft/host_fft.py index 69222d79b..c675ec325 100644 --- a/hysop/numerics/fft/host_fft.py +++ b/hysop/numerics/fft/host_fft.py @@ -9,7 +9,7 @@ OpenCl backend base interface for fast Fourier Transforms. import psutil import numpy as np -from hysop import __MAX_THREADS__ +from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__ from hysop.tools.types import first_not_None, check_instance from hysop.backend.host.host_array_backend import HostArrayBackend from hysop.backend.host.host_array import HostArray @@ -67,10 +67,14 @@ class HostFFTI(FFTI): Get the default host FFT interface which is a multithreaded FFTW interface with ESTIMATE planning effort. """ - threads = first_not_None(threads, __MAX_THREADS__) + threads = first_not_None(threads, __FFTW_NUM_THREADS__) + planner_effort = first_not_None(planner_effort, __FFTW_PLANNER_EFFORT__) + planning_timelimit = first_not_None(planner_timelimit, __FFTW_PLANNER_TIMELIMIT__) planner_effort = first_not_None(planner_effort, 'FFTW_ESTIMATE') from hysop.numerics.fft.fftw_fft import FftwFFT - return FftwFFT(threads=threads, + return FftwFFT(threads=threads, + planner_effort=planner_effort, + planning_timelimit=planning_timelimit, backend=backend, allocator=allocator, planner_effort=planner_effort, planning_timelimit=planning_timelimit, -- GitLab