diff --git a/examples/example_utils.py b/examples/example_utils.py index 11f7742a8f8555706e1e45348e7dd95129790838..b684387a4d076ce7d60f0c39f1f4a08cde78fa32 100644 --- a/examples/example_utils.py +++ b/examples/example_utils.py @@ -94,6 +94,8 @@ class HysopArgParser(argparse.ArgumentParser): os.environ[target] = '0' elif isinstance(value, str): os.environ[target] = value + elif isinstance(value, int): + os.environ[target] = str(value) else: msg='Invalid value of type {}.'.format(type(value)) raise TypeError(msg) @@ -148,7 +150,6 @@ class HysopArgParser(argparse.ArgumentParser): args = self.parse_args() args.__class__ = HysopNamespace - self._check_fft_args(args) self._check_threading_args(args) self._setup_hysop_env(args) @@ -400,6 +401,21 @@ class HysopArgParser(argparse.ArgumentParser): simu.add_argument('-lcfl', '--lagrangian-cfl', type=float, default=None, dest='lcfl', help='Specify LCFL for adaptive time stepping.') + simu.add_argument('-stopi', '--stop-at-initialization', default=False, action='store_true', + dest='stop_at_initialization', + help='Stop execution before problem initialization.') + simu.add_argument('-stopd', '--stop-at-discretization', default=False, action='store_true', + dest='stop_at_discretization', + help='Stop execution before problem discretization.') + simu.add_argument('-stopwp', '--stop-at-work-properties', default=False, action='store_true', + dest='stop_at_work_properties', + help='Stop execution before problem work properties retrieval.') + simu.add_argument('-stopwa', '--stop-at-work-allocation', default=False, action='store_true', + dest='stop_at_work_allocation', + help='Stop execution before problem work properties allocation (allocation of temporary buffers).') + simu.add_argument('-stops', '--stop-at-setup', default=False, action='store_true', + dest='stop_at_setup', + help='Stop execution before problem setup.') simu.add_argument('-dr', '--dry-run', default=False, action='store_true', dest='dry_run', help='Stop execution before the first simulation iteration.') @@ -409,7 +425,8 @@ class HysopArgParser(argparse.ArgumentParser): self._check_default(args, ('tstart', 'tend'), float) self._check_default(args, 'dt', float, allow_none=True) self._check_default(args, 'nb_iter', int, allow_none=True) - self._check_default(args, 'dry_run', bool, allow_none=False) + self._check_default(args, ('dry_run', 'stop_at_initialization', 'stop_at_discretization', 'stop_at_setup', + 'stop_at_work_properties', 'stop_at_work_allocation'), bool, allow_none=False) self._check_positive(args, 'dt', strict=True, allow_none=True) self._check_positive(args, 'nb_iter', strict=True, allow_none=True) self._check_positive(args, 'max_iter', strict=True, allow_none=True) @@ -481,13 +498,13 @@ class HysopArgParser(argparse.ArgumentParser): threading = self.add_argument_group('threading parameters') msg = "Enable threads for backends that supports it (Numba and FFTW) by setting HYSOP_ENABLE_THREADS. " msg += "Disabling threading will limit all threading backends to one thread and set numba default backend to 'cpu' instead of 'parallel'." - threading.add_argument('--enable-threading', type=str, default=True, + threading.add_argument('--enable-threading', type=str, default='1', dest='enable_threading', help=msg) - msg="If this parameter is set to 'physical', the maximum number of threads will be set to the number of physical cores available to the process (taking into account the cpuset). " - msg+="If set to 'logical', logical cores will be chosen instead. Else this parameter expects a positive integer." - msg+='Set the default maximum usable threads for threading backends (OpenMP, MKL) and operator backends using threads (Numba, FFTW). ' + msg='Set the default maximum usable threads for threading backends (OpenMP, MKL) and operator backends using threads (Numba, FFTW). ' msg+='This parameter will set HYSOP_MAX_THREADS and does not affect the OpenCL backend.' + msg+="If this parameter is set to 'physical', the maximum number of threads will be set to the number of physical cores available to the process (taking into account the cpuset). " + msg+="If set to 'logical', logical cores will be chosen instead. Else this parameter expects a positive integer." msg+='If --enable-threads is set to False, this parameter is ignored and HYSOP_MAX_THREADS will be set to 1.' threading.add_argument('--max-threads', type=str, default='physical', dest='max_threads', @@ -498,15 +515,21 @@ class HysopArgParser(argparse.ArgumentParser): threading.add_argument('--mkl-threads', type=str, default=None, dest='mkl_threads', help='This parameter will set MKL_NUM_THREADS to a custom value (overrides --max-threads).') - threading.add_argument('--fftw-threads', type=str, default=None, - dest='fftw_threads', - help='This parameter will set FFTW_NUM_THREADS to a custom value (overrides --max-threads).') threading.add_argument('--numba-threads', type=str, default=None, dest='numba_threads', help='This parameter will set NUMBA_NUM_THREADS to a custom value (overrides --max-threads).') threading.add_argument('--numba-threading-layer', type=str, default='workqueue', dest='numba_threading_layer', help="This parameter will set NUMBA_THREADING_LAYER to a custom value ('workqueue' is available on all platforms, but not 'omp' and 'tbb'). Use 'numba -s' to list available numba threading layers.") + threading.add_argument('--fftw-threads', type=str, default=None, + dest='fftw_threads', + help='This parameter will set HYSOP_FFTW_NUM_THREADS to a custom value (overrides --max-threads).') + threading.add_argument('--fftw-planner-effort', type=str, default='FFTW_ESTIMATE', + dest='fftw_planner_effort', + help='Set default planning effort for FFTW plans. The actual number of threads used by FFTW may depend on the planning step. This parameter will set HYSOP_FFTW_PLANNER_EFFORT.') + threading.add_argument('--fftw-planner-timelimit', type=str, default='FFTW_NO_TIMELIMIT', + dest='fftw_planner_timelimit', + help='Set an approximate upper bound in seconds for FFTW planning. This parameter will set HYSOP_FFTW_PLANNER_TIMELIMIT.') return threading @@ -548,8 +571,10 @@ class HysopArgParser(argparse.ArgumentParser): return opencl def _check_threading_args(self, args): - self._check_default(args, ('enable_threading', 'max_threads', 'numba_threading_layer'), str, allow_none=False) - self._check_default(args, ('openmp_threads', 'mkl_threads', 'numba_threads'), str, allow_none=True) + self._check_default(args, ('enable_threading', 'max_threads', 'numba_threading_layer', + 'fftw_planner_effort', 'fftw_planner_timelimit'), str, allow_none=False) + self._check_default(args, ('openmp_threads', 'mkl_threads', 'numba_threads', 'fftw_threads'), + str, allow_none=True) args.enable_threading = self._convert_bool('enable_threading', args.enable_threading) if args.enable_threading: @@ -557,9 +582,12 @@ class HysopArgParser(argparse.ArgumentParser): else: args.max_threads = 1 for argname in ('openmp_threads', 'mkl_threads', 'numba_threads', 'fftw_threads'): - setattr(args, argname, self._convert_threads(argname, getattr(args, argname), default=args.max_threads)) - - args.numba_threading_layer = self._convert_numba_threading_layer('numba_threading_layer', args.numba_threading_layer) + setattr(args, argname, self._convert_threads(argname, getattr(args, argname), + default=args.max_threads)) + args.numba_threading_layer = self._convert_numba_threading_layer('numba_threading_layer', + args.numba_threading_layer) + args.fftw_planner_effort = self._convert_fftw_planner_effort('fftw_planner_effort', + args.fftw_planner_effort) def _check_opencl_args(self, args): self._check_default(args, ('cl_platform_id', 'cl_device_id'), int, allow_none=True) @@ -974,17 +1002,17 @@ class HysopArgParser(argparse.ArgumentParser): msg='Uknown tracing module \'{}\'.'.format(module) self.error(msg) - set_env('ENABLE_THREADING', args.enable_threading) - set_env('MAX_THREADS', args.max_threads) - set_env('OMP_NUM_THREADS', args.openmp_threads, False) - set_env('MKL_NUM_THREADS', args.mkl_threads, False) - set_env('NUMBA_NUM_THREADS', args.numba_threads, False) - set_env('NUMBA_THREADING_LAYER', args.numba_threading_layer, False - set_env('FFTW_NUM_THREADS', args.fftw_threads, False) - if (args.fftw_planner_effort is not None): - set_env('FFTW_PLANNER_EFFORT', args.fftw_planner_effort, False) - if (args.fftw_planner_timelimite is not None): - set_env('FFTW_PLANNER_TIMELIMIT', args.fftw_planner_timelimite, False) + self.set_env('ENABLE_THREADING', args.enable_threading, True) + self.set_env('MAX_THREADS', args.max_threads, True) + self.set_env('FFTW_NUM_THREADS', args.fftw_threads, True) + self.set_env('FFTW_PLANNER_EFFORT', args.fftw_planner_effort, True) + self.set_env('FFTW_PLANNER_TIMELIMIT', args.fftw_planner_timelimit, True) + + # those environment variables are not part of HySoP + self.set_env('OMP_NUM_THREADS', args.openmp_threads, False) + self.set_env('MKL_NUM_THREADS', args.mkl_threads, False) + self.set_env('NUMBA_NUM_THREADS', args.numba_threads, False) + self.set_env('NUMBA_THREADING_LAYER', args.numba_threading_layer, False) def _setup_parameters(self, args): pass @@ -1054,6 +1082,19 @@ class HysopArgParser(argparse.ArgumentParser): msg = msg.format(argname, val) self.error(msg) return val + + def _convert_fftw_planner_effort(self, argname, val): + values = { + 'FFTW_ESTIMATE': 'FFTW_ESTIMATE', + 'FFTW_MEASURE': 'FFTW_MEASURE', + 'FFTW_PATIENT': 'FFTW_PATIENT', + 'FFTW_EXHAUSTIVE': 'FFTW_EXHAUSTIVE', + 'fftw_estimate': 'FFTW_ESTIMATE', + 'fftw_measure': 'FFTW_MEASURE', + 'fftw_patient': 'FFTW_PATIENT', + 'fftw_exhaustive': 'FFTW_EXHAUSTIVE', + } + self._check_convert(argname, val, values) def _convert_numba_threading_layer(self, argname, val): values = { diff --git a/examples/particles_above_salt/particles_above_salt_bc.py b/examples/particles_above_salt/particles_above_salt_bc.py index 456bd7166c0b6ed6b0f744481d32af2badb1147f..9b1cb78b7fa2a9ff2b47ce259c780eb5881066cc 100644 --- a/examples/particles_above_salt/particles_above_salt_bc.py +++ b/examples/particles_above_salt/particles_above_salt_bc.py @@ -295,7 +295,7 @@ def compute(args): compute_mean_fields, min_max_U, min_max_W, adapt_dt) - problem.build() + problem.build(args) # If a visu_rank was provided, and show_graph was set, # display the graph on the given process rank. diff --git a/hysop/__init__.py.in b/hysop/__init__.py.in index 61931fee295e5c13d9ff5a3948186a78b0e0de0d..fa99e48793562d7372ba4e9afd05373752840a56 100644 --- a/hysop/__init__.py.in +++ b/hysop/__init__.py.in @@ -38,11 +38,11 @@ __VERBOSE__ = get_env('VERBOSE', ("@VERBOSE@" is "ON")) __DEBUG__ = get_env('DEBUG', ("@DEBUG@" is "ON")) __PROFILE__ = get_env('PROFILE', ("@PROFILE@" is "ON")) -__TRACE_CALLS__ = get_env('TRACE_CALLS', False) -__TRACE_WARNINGS__ = get_env('TRACE_WARNINGS', False) +__TRACE_CALLS__ = get_env('TRACE_CALLS', False) +__TRACE_WARNINGS__ = get_env('TRACE_WARNINGS', False) __TRACE_MEMALLOCS__ = get_env('TRACE_MEMALLOC', False) -__TRACE_KERNELS__ = get_env('TRACE_KERNELS', False) -__KERNEL_DEBUG__ = get_env('KERNEL_DEBUG', False) +__TRACE_KERNELS__ = get_env('TRACE_KERNELS', False) +__KERNEL_DEBUG__ = get_env('KERNEL_DEBUG', False) __BACKTRACE_BIG_MEMALLOCS__ = get_env('BACKTRACE_BIG_MEMALLOCS', False) __TEST_ALL_OPENCL_PLATFORMS__ = get_env('TEST_ALL_OPENCL_PLATFORMS', False) @@ -60,7 +60,7 @@ __DEFAULT_NUMBA_TARGET__ = ('parallel' if __ENABLE_THREADING__ else 'cpu') # FFTW __FFTW_NUM_THREADS__ = get_env('FFTW_NUM_THREADS', __MAX_THREADS__) __FFTW_PLANNER_EFFORT__ = get_env('FFTW_PLANNER_EFFORT', 'FFTW_ESTIMATE') -__FFTW_PLANNER_TIMELIMIT__ = get_env('FFTW_PLANNER_TIMELIMIT', None) +__FFTW_PLANNER_TIMELIMIT__ = get_env('FFTW_PLANNER_TIMELIMIT', 'FFTW_NO_TIMELIMIT') # OpenCL __DEFAULT_PLATFORM_ID__ = int(get_env('DEFAULT_PLATFORM_ID', @OPENCL_DEFAULT_OPENCL_PLATFORM_ID@)) @@ -171,12 +171,20 @@ msg_threads = \ NUMBA_THREADING_LAYER: {} NUMBA_NUM_THREADS: {} -------------------------------- + FFTW_NUM_THREADS: {} + FFTW_PLANNER_EFFORT: {} + FFTW_PLANNER_TIMELIMIT: {} + -------------------------------- '''.format( - __ENABLE_THREADING__, __MAX_THREADS__, + __ENABLE_THREADING__, + __MAX_THREADS__, os.environ['OMP_NUM_THREADS'], os.environ['MKL_NUM_THREADS'], __DEFAULT_NUMBA_TARGET__, os.environ['NUMBA_THREADING_LAYER'], - os.environ['NUMBA_NUM_THREADS']) + os.environ['NUMBA_NUM_THREADS'], + __FFTW_NUM_THREADS__, + __FFTW_PLANNER_EFFORT__, + __FFTW_PLANNER_TIMELIMIT__) mprint(msg_threads) diff --git a/hysop/backend/device/opencl/clpeak.py b/hysop/backend/device/opencl/clpeak.py index 657305c8a9d31b49d957af2f0c4e413de0010720..1cbba49a1b34c66119a67918164e8f8ef94324ae 100644 --- a/hysop/backend/device/opencl/clpeak.py +++ b/hysop/backend/device/opencl/clpeak.py @@ -14,7 +14,7 @@ from hysop.backend.hardware.hwinfo import HardwareStatistics class ClPeakInfo(object): __FNULL = open(os.devnull, 'w') - __CMD_TIME_OUT = 30 # 30s timeout for clpeak calls + __CMD_TIME_OUT = 60 # 60s timeout for clpeak calls __clpeak_bandwidth_units = { 'bps': 1e00, 'kbps': 1e03, diff --git a/hysop/problem.py b/hysop/problem.py index 094d16b7cd6642d5cfe4baf74a69f3999d5ee101..44269d90ee9cba66e062df76ba7cc29eba098173 100644 --- a/hysop/problem.py +++ b/hysop/problem.py @@ -1,5 +1,5 @@ -import datetime +import datetime, sys from hysop.constants import Backend, MemoryOrdering from hysop.tools.string_utils import vprint_banner from hysop.tools.contexts import Timer @@ -19,22 +19,43 @@ class Problem(ComputationalGraph): self.push_nodes(*ops) @debug - def build(self, allow_subbuffers=False): + def build(self, args=None, allow_subbuffers=False): with Timer() as tm: - vprint('\nInitializing problem...') - self.initialize(outputs_are_inputs=True, topgraph_method=None) - vprint('\nDiscretizing problem...') - self.discretize() - vprint('\nGetting work properties...') - work = self.get_work_properties() - vprint('\nAllocating work...') - work.allocate(allow_subbuffers=allow_subbuffers) - vprint('\nSetting up problem...') - self.setup(work) + msg = self.build_problem(args=args, allow_subbuffers=allow_subbuffers) + if msg: + msg='Problem {} achieved, exiting !'.format(msg) + vprint_banner(msg) + sys.exit(0) msg=' Problem building took {} ({}s) ' msg=msg.format(datetime.timedelta(seconds=round(tm.interval)), tm.interval) vprint_banner(msg, spacing=True, at_border=2) + + def build_problem(self, args, allow_subbuffers): + if (args is not None) and args.stop_at_initialization: + return 'initialization' + vprint('\nInitializing problem...') + self.initialize(outputs_are_inputs=True, topgraph_method=None) + + if (args is not None) and args.stop_at_discretization: + return 'discretization' + vprint('\nDiscretizing problem...') + self.discretize() + + if (args is not None) and args.stop_at_work_properties: + return 'work properties retrieval' + vprint('\nGetting work properties...') + work = self.get_work_properties() + + if (args is not None) and args.stop_at_work_allocation: + return 'work allocation' + vprint('\nAllocating work...') + work.allocate(allow_subbuffers=allow_subbuffers) + + if (args is not None) and args.stop_at_setup: + return 'setup' + vprint('\nSetting up problem...') + self.setup(work) def discretize(self): super(Problem, self).discretize()