diff --git a/hysop/backend/device/opencl/__init__.py b/hysop/backend/device/opencl/__init__.py index 44c87842d187f64336c91c35178ac6c0dddb9132..36f976e52cab2299ce1d8825c22e8a14b209fc9e 100644 --- a/hysop/backend/device/opencl/__init__.py +++ b/hysop/backend/device/opencl/__init__.py @@ -24,14 +24,14 @@ except ImportError: from pyopencl import _cl as cl_api -from hysop import __DEFAULT_PLATFORM_ID__, __DEFAULT_DEVICE_ID__ +from hysop import __DEFAULT_PLATFORM_ID__, __DEFAULT_DEVICE_ID__, __PROFILE__ from hysop.tools.io_utils import IO from hysop.backend.device import KERNEL_DUMP_FOLDER OPENCL_KERNEL_DUMP_FOLDER='{}/opencl'.format(KERNEL_DUMP_FOLDER) """Default opencl kernel dump folder.""" -__OPENCL_PROFILE__ = False +__OPENCL_PROFILE__ = __PROFILE__ """Boolean, true to enable OpenCL profiling events to time computations""" ## open cl underlying implementation diff --git a/hysop/backend/device/opencl/opencl_array_backend.py b/hysop/backend/device/opencl/opencl_array_backend.py index f034a71839fba775c0bf05fe02f9e470225ca6bc..e6dafbcf06ad04b7fa78de853e181bab52fadecd 100644 --- a/hysop/backend/device/opencl/opencl_array_backend.py +++ b/hysop/backend/device/opencl/opencl_array_backend.py @@ -63,11 +63,12 @@ class _ElementwiseKernel(object): self.kernel_args = kernel_args self.kernel_kwds = kernel_kwds self.default_queue = default_queue + self._apply_msg=' '+name+'<<<{}>>>()'.format(args[0].shape) def global_size_configured(self): return True def __call__(self, queue=None, wait_for=None, **kwds): if __KERNEL_DEBUG__: - print ' '+self._name+'<<<{}>>>()'.format(self.kernel_args[0].shape) + print self._apply_msg queue = first_not_None(queue, self.default_queue) self.kernel_kwds['queue'] = queue self.kernel_kwds['wait_for'] = wait_for @@ -142,11 +143,12 @@ class _ReductionKernel(object): self.kernel_args = kernel_args self.kernel_kwds = kernel_kwds self.default_queue = default_queue + self._apply_msg=' '+name+'<<<{}>>>()'.format(self.kernel_args[0].shape) def global_size_configured(self): return True def __call__(self, queue=None, wait_for=None, **kwds): if __KERNEL_DEBUG__: - print ' '+self._name+'<<<{}>>>()'.format(self.kernel_args[0].shape) + print self._apply_msg queue = first_not_None(queue, self.default_queue) self.kernel_kwds['queue'] = queue self.kernel_kwds['wait_for'] = wait_for diff --git a/hysop/backend/device/opencl/opencl_kernel_launcher.py b/hysop/backend/device/opencl/opencl_kernel_launcher.py index 97007b619c26349a45ba6fb7da45b7e8ed3e7e02..d5b122be2fa2f9e892cf8d5b911755309930bbeb 100644 --- a/hysop/backend/device/opencl/opencl_kernel_launcher.py +++ b/hysop/backend/device/opencl/opencl_kernel_launcher.py @@ -5,7 +5,7 @@ from hysop.deps import it, warnings from hysop.tools.decorators import debug from hysop.tools.types import check_instance, first_not_None from hysop.tools.numpywrappers import npw -from hysop.backend.device.opencl import cl +from hysop.backend.device.opencl import cl, __OPENCL_PROFILE__ from hysop.backend.device.opencl.opencl_kernel_statistics import OpenClKernelStatistics from hysop.tools.warning import HysopWarning @@ -35,6 +35,19 @@ class OpenClKernelListLauncher(object): self._parameters = {} self._apply_msg = '>OpenClKernelListLauncher {}'.format(name) + if __OPENCL_PROFILE__: + self.run_kernel = self._run_kernel_and_profile + else: + self.run_kernel = self._run_kernel + + def _run_kernel_and_profile(self, kernel, **kwds): + evt = self._run_kernel(kernel, **kwds) + evt.wait() + print '{} | {}'.format(evt.profile.end - evt.profile.start, str(getattr(kernel, '_apply_msg', type(kernel))).strip()) + return evt + def _run_kernel(self, kernel, **kwds): + return kernel.__call__(**kwds) + def push_copy_host_device(self, varname, src, dst, src_device_offset=None, dst_device_offset=None, byte_count=None): """Shortcut for OpenClCopyBuffer kernels creation.""" @@ -131,10 +144,10 @@ class OpenClKernelListLauncher(object): kernels = self._kernels if kernels: - evt = kernels[0].__call__(queue=queue, wait_for=wait_for, **kwds) + evt = self.run_kernel(kernels[0], queue=queue, wait_for=wait_for, **kwds) for kernel in kernels[1:]: try: - evt = kernel.__call__(queue=queue, **kwds) + evt = self.run_kernel(kernel, queue=queue, **kwds) except: msg='\nFailed to call kernel {} of type {}.\n' msg=msg.format(kernel.name,type(kernel).__name__) diff --git a/hysop/core/graph/continuous.py b/hysop/core/graph/continuous.py index e620e718e64747905772d63659d2a797e7872b60..e6e313cd220a1412eb3cc93c31f08a1ffbbe3019 100755 --- a/hysop/core/graph/continuous.py +++ b/hysop/core/graph/continuous.py @@ -85,7 +85,7 @@ class OperatorBase(TaggedObject): if not __PROFILE__: return self._profiler.summarize() - vprint(str(self._profiler)) + print(str(self._profiler)) def _set_io(self): """