From 23f33146defbfa557cec3ac99a5b068f17d03d9a Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr>
Date: Thu, 29 Nov 2018 12:04:24 +0100
Subject: [PATCH] better example interface including threads and fft parameters

---
 examples/example_utils.py      | 98 ++++++++++++++++++++++++++++++++--
 hysop/__init__.py.in           |  8 ++-
 hysop/numerics/fft/fftw_fft.py |  8 ++-
 hysop/numerics/fft/host_fft.py | 10 ++--
 4 files changed, 113 insertions(+), 11 deletions(-)

diff --git a/examples/example_utils.py b/examples/example_utils.py
index 30ee3c338..11f7742a8 100644
--- a/examples/example_utils.py
+++ b/examples/example_utils.py
@@ -1,4 +1,4 @@
-import os, argparse, tempfile, colors, textwrap, warnings, contextlib, tee, re, errno, shutil
+import os, argparse, tempfile, colors, textwrap, warnings, contextlib, tee, re, errno, shutil, psutil
 from argparse_color_formatter import ColorHelpFormatter
 
 # Fix a bug in the tee module #########
@@ -83,8 +83,9 @@ class HysopArgParser(argparse.ArgumentParser):
         return (cls.get_fs_type(path) in ('nfs',))
 
     @classmethod
-    def set_env(cls, target, value):
-        target = 'HYSOP_{}'.format(target)
+    def set_env(cls, target, value, hysop=True):
+        if hysop:
+            target = 'HYSOP_{}'.format(target)
         if (value is None):
             pass
         elif (value is True):
@@ -135,6 +136,7 @@ class HysopArgParser(argparse.ArgumentParser):
         self._add_domain_args()
         self._add_simu_args()
         self._add_method_args()
+        self._add_threading_args()
         self._add_opencl_args()
         self._add_autotuner_args()
         self._add_graphical_io_args()
@@ -145,6 +147,9 @@ class HysopArgParser(argparse.ArgumentParser):
     def parse(self): 
         args = self.parse_args()
         args.__class__ = HysopNamespace
+        
+        self._check_fft_args(args)
+        self._check_threading_args(args)
 
         self._setup_hysop_env(args)
         
@@ -289,7 +294,7 @@ class HysopArgParser(argparse.ArgumentParser):
         args = self.add_argument_group('Main parameters')
         args.add_argument('-impl', '--implementation', type=str, default='python',
                 dest='impl',
-                help='Backend implementation (either python or opencl).')
+                help='Backend implementation (either python, fortran or opencl).')
         args.add_argument('-cp', '--compute-precision', type=str, default='fp32',
                 dest='compute_precision', 
                 help='Floating-point precision used to discretize the parameters and fields.')
@@ -472,6 +477,38 @@ class HysopArgParser(argparse.ArgumentParser):
         args.interpolation = self._convert_interpolation('interpolation', 
                 args.interpolation)
         
+    def _add_threading_args(self):
+        threading = self.add_argument_group('threading parameters')
+        msg =  "Enable threads for backends that supports it (Numba and FFTW) by setting HYSOP_ENABLE_THREADS. "
+        msg += "Disabling threading will limit all threading backends to one thread and set numba default backend to 'cpu' instead of 'parallel'."
+        threading.add_argument('--enable-threading', type=str, default=True, 
+                dest='enable_threading',
+                help=msg)
+        msg="If this parameter is set to 'physical', the maximum number of threads will be set to the number of physical cores available to the process (taking into account the cpuset). "
+        msg+="If set to 'logical', logical cores will be chosen instead. Else this parameter expects a positive integer."
+        msg+='Set the default maximum usable threads for threading backends (OpenMP, MKL) and operator backends using threads (Numba, FFTW). '
+        msg+='This parameter will set HYSOP_MAX_THREADS and does not affect the OpenCL backend.'
+        msg+='If --enable-threads is set to False, this parameter is ignored and HYSOP_MAX_THREADS will be set to 1.'
+        threading.add_argument('--max-threads', type=str, default='physical', 
+                dest='max_threads',
+                help=msg)
+        threading.add_argument('--openmp-threads', type=str, default=None, 
+                dest='openmp_threads',
+                help='This parameter will set OMP_NUM_THREADS to a custom value (overrides --max-threads).')
+        threading.add_argument('--mkl-threads', type=str, default=None, 
+                dest='mkl_threads',
+                help='This parameter will set MKL_NUM_THREADS to a custom value (overrides --max-threads).')
+        threading.add_argument('--fftw-threads', type=str, default=None, 
+                dest='fftw_threads',
+                help='This parameter will set FFTW_NUM_THREADS to a custom value (overrides --max-threads).')
+        threading.add_argument('--numba-threads', type=str, default=None, 
+                dest='numba_threads',
+                help='This parameter will set NUMBA_NUM_THREADS to a custom value (overrides --max-threads).')
+        threading.add_argument('--numba-threading-layer', type=str, default='workqueue', 
+                dest='numba_threading_layer',
+                help="This parameter will set NUMBA_THREADING_LAYER to a custom value ('workqueue' is available on all platforms, but not 'omp' and 'tbb'). Use 'numba -s' to list available numba threading layers.")
+        return threading
+
 
     def _add_opencl_args(self):
         opencl = self.add_argument_group('OpenCL parameters')
@@ -509,6 +546,20 @@ class HysopArgParser(argparse.ArgumentParser):
                 dest='cl_enable_loop_unrolling',
                 help='Enable loop unrolling for code-generated OpenCL kernels.')
         return opencl
+    
+    def _check_threading_args(self, args):
+        self._check_default(args, ('enable_threading', 'max_threads', 'numba_threading_layer'), str, allow_none=False)
+        self._check_default(args, ('openmp_threads', 'mkl_threads', 'numba_threads'), str, allow_none=True)
+
+        args.enable_threading = self._convert_bool('enable_threading', args.enable_threading)
+        if args.enable_threading:
+            args.max_threads = self._convert_threads('max_threads', args.max_threads, default=None)
+        else:
+            args.max_threads = 1
+        for argname in ('openmp_threads', 'mkl_threads', 'numba_threads', 'fftw_threads'):
+            setattr(args, argname, self._convert_threads(argname, getattr(args, argname), default=args.max_threads))
+
+        args.numba_threading_layer = self._convert_numba_threading_layer('numba_threading_layer', args.numba_threading_layer)
 
     def _check_opencl_args(self, args):
         self._check_default(args, ('cl_platform_id', 'cl_device_id'), int, allow_none=True)
@@ -923,6 +974,18 @@ class HysopArgParser(argparse.ArgumentParser):
                     msg='Uknown tracing module \'{}\'.'.format(module)
                     self.error(msg)
 
+        set_env('ENABLE_THREADING',      args.enable_threading)
+        set_env('MAX_THREADS',           args.max_threads)
+        set_env('OMP_NUM_THREADS',       args.openmp_threads,        False)
+        set_env('MKL_NUM_THREADS',       args.mkl_threads,           False)
+        set_env('NUMBA_NUM_THREADS',     args.numba_threads,         False)
+        set_env('NUMBA_THREADING_LAYER', args.numba_threading_layer, False
+        set_env('FFTW_NUM_THREADS',      args.fftw_threads,          False)
+        if (args.fftw_planner_effort is not None):
+            set_env('FFTW_PLANNER_EFFORT', args.fftw_planner_effort, False)
+        if (args.fftw_planner_timelimite is not None):
+            set_env('FFTW_PLANNER_TIMELIMIT', args.fftw_planner_timelimite, False)
+
     def _setup_parameters(self, args):
         pass
 
@@ -972,6 +1035,33 @@ class HysopArgParser(argparse.ArgumentParser):
             '1':     True,
         }
         return self._check_convert(argname, val, values)
+    
+    def _convert_threads(self, argname, val, default):
+        if (val == 'physical'):
+            val = psutil.cpu_count(logical=False)
+        elif (val == 'logical'):
+            val = psutil.cpu_count(logical=True)
+        elif (val is None):
+            if (default is None):
+                msg = "'Parameter '{}' has been set to None and no default value has been set."
+                msg = msg.format(argname)
+                self.error(msg)
+            else:
+                val = default
+        val = int(val)
+        if not (val > 0): 
+           msg = "'Parameter '{}' has been set to an invalid number of threads {}."
+           msg = msg.format(argname, val)
+           self.error(msg) 
+        return val
+    
+    def _convert_numba_threading_layer(self, argname, val):
+        values = {
+            'workqueue': 'workqueue',
+            'omp':       'omp',
+            'tbb':       'tbb'
+        }
+        return self._check_convert(argname, val, values)
 
     def _convert_implementation(self, argname, impl):
         from hysop.constants import Implementation
diff --git a/hysop/__init__.py.in b/hysop/__init__.py.in
index 9e79793ef..61931fee2 100644
--- a/hysop/__init__.py.in
+++ b/hysop/__init__.py.in
@@ -50,14 +50,18 @@ __ENABLE_LONG_TESTS__ = get_env('ENABLE_LONG_TESTS', ("@ENABLE_LONG_TESTS@" is "
 
 # Threads
 __ENABLE_THREADING__ = get_env('ENABLE_THREADING', True)
-__MAX_THREADS__ = int(get_env('MAX_THREADS', psutil.cpu_count(logical=True)) 
-                            if __ENABLE_THREADING__ else 1)
+__MAX_THREADS__ = int(get_env('MAX_THREADS', psutil.cpu_count(logical=False)) if __ENABLE_THREADING__ else 1)
 set_env('OMP_NUM_THREADS',   __MAX_THREADS__)
 set_env('MKL_NUM_THREADS',   __MAX_THREADS__)
 set_env('NUMBA_NUM_THREADS', __MAX_THREADS__)
 set_env('NUMBA_THREADING_LAYER', 'workqueue') # Use 'numba -s' to list support
 __DEFAULT_NUMBA_TARGET__ = ('parallel' if __ENABLE_THREADING__ else 'cpu')
 
+# FFTW
+__FFTW_NUM_THREADS__       = get_env('FFTW_NUM_THREADS',        __MAX_THREADS__)
+__FFTW_PLANNER_EFFORT__    = get_env('FFTW_PLANNER_EFFORT',    'FFTW_ESTIMATE')
+__FFTW_PLANNER_TIMELIMIT__ = get_env('FFTW_PLANNER_TIMELIMIT', None)
+
 # OpenCL
 __DEFAULT_PLATFORM_ID__ = int(get_env('DEFAULT_PLATFORM_ID', @OPENCL_DEFAULT_OPENCL_PLATFORM_ID@))
 __DEFAULT_DEVICE_ID__   = int(get_env('DEFAULT_DEVICE_ID', @OPENCL_DEFAULT_OPENCL_DEVICE_ID@))
diff --git a/hysop/numerics/fft/fftw_fft.py b/hysop/numerics/fft/fftw_fft.py
index 1557db97f..855a2e7cb 100644
--- a/hysop/numerics/fft/fftw_fft.py
+++ b/hysop/numerics/fft/fftw_fft.py
@@ -8,6 +8,7 @@ import warnings
 import pyfftw
 import numpy as np
 
+from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__
 from hysop.tools.types import first_not_None
 from hysop.tools.misc import prod
 from hysop.numerics.fft.fft import HysopFFTWarning, bytes2str
@@ -102,14 +103,17 @@ class FftwFFT(HostFFTI):
     Planning destroys initial arrays content.
     """
 
-    def __init__(self, threads=1, 
-                       planner_effort='FFTW_MEASURE', 
+    def __init__(self, threads=None, 
+                       planner_effort=None,
                        planning_timelimit=None,
                        destroy_input=False,
                        warn_on_allocation=True,
                        warn_on_misalignment=True,
                        backend=None, allocator=None,
                        **kwds):
+        threads            = first_not_None(threads,           __FFTW_NUM_THREADS__)
+        planner_effort     = first_not_None(planner_effort,    __FFTW_PLANNER_EFFORT__)
+        planning_timelimit = first_not_None(planner_timelimit, __FFTW_PLANNER_TIMELIMIT__)
         super(FftwFFT, self).__init__(backend=backend, allocator=allocator, 
                 warn_on_allocation=warn_on_allocation, **kwds)
         self.supported_ftypes = (np.float32, np.float64, np.longdouble)
diff --git a/hysop/numerics/fft/host_fft.py b/hysop/numerics/fft/host_fft.py
index 69222d79b..c675ec325 100644
--- a/hysop/numerics/fft/host_fft.py
+++ b/hysop/numerics/fft/host_fft.py
@@ -9,7 +9,7 @@ OpenCl backend base interface for fast Fourier Transforms.
 import psutil
 import numpy as np
 
-from hysop import __MAX_THREADS__
+from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__
 from hysop.tools.types import first_not_None, check_instance
 from hysop.backend.host.host_array_backend import HostArrayBackend
 from hysop.backend.host.host_array import HostArray
@@ -67,10 +67,14 @@ class HostFFTI(FFTI):
         Get the default host FFT interface which is a multithreaded FFTW interface with 
         ESTIMATE planning effort.
         """
-        threads = first_not_None(threads, __MAX_THREADS__)
+        threads            = first_not_None(threads,           __FFTW_NUM_THREADS__)
+        planner_effort     = first_not_None(planner_effort,    __FFTW_PLANNER_EFFORT__)
+        planning_timelimit = first_not_None(planner_timelimit, __FFTW_PLANNER_TIMELIMIT__)
         planner_effort = first_not_None(planner_effort, 'FFTW_ESTIMATE')
         from hysop.numerics.fft.fftw_fft import FftwFFT
-        return FftwFFT(threads=threads, 
+        return FftwFFT(threads=threads,  
+                       planner_effort=planner_effort, 
+                       planning_timelimit=planning_timelimit,
                        backend=backend, allocator=allocator,
                        planner_effort=planner_effort, 
                        planning_timelimit=planning_timelimit,
-- 
GitLab