diff --git a/hysop/__init__.py.in b/hysop/__init__.py.in index 8ccdf4416a6cbdd3bc4653e86f2e5f9ef0c093b3..b500ade80084c4a4650a55127914c81982d4f6ae 100644 --- a/hysop/__init__.py.in +++ b/hysop/__init__.py.in @@ -2,6 +2,7 @@ on hybrid architectures (MPI-GPU) """ +import psutils from functools import wraps from hysop.deps import __builtin__, print_function, os, sys, warnings, traceback @@ -43,6 +44,11 @@ __BACKTRACE_BIG_MEMALLOCS__ = get_env('BACKTRACE_BIG_MEMALLOCS', False) __TEST_ALL_OPENCL_PLATFORMS__ = get_env('TEST_ALL_OPENCL_PLATFORMS', False) __ENABLE_LONG_TESTS__ = get_env('ENABLE_LONG_TESTS', ("@ENABLE_LONG_TESTS@" is "ON")) +# Threads +__HYSOP_ENABLE_THREADING__ = get_env('ENABLE_THREADING', True) +__HYSOP_MAX_THREADS__ = int(get_env('MAX_THREADS', psutils.cpu_count(logical=False)) + if __HYSOP_ENABLE_THREADING__ else 1) + # OpenCL __DEFAULT_PLATFORM_ID__ = int(get_env('DEFAULT_PLATFORM_ID', @OPENCL_DEFAULT_OPENCL_PLATFORM_ID@)) __DEFAULT_DEVICE_ID__ = int(get_env('DEFAULT_DEVICE_ID', @OPENCL_DEFAULT_OPENCL_DEVICE_ID@)) diff --git a/hysop/backend/host/host_operator.py b/hysop/backend/host/host_operator.py index 38c8748b7d33d1ddfb1e9e7cb04bf300207e70fa..1549ee981364c2b4eed5c1636f2838e95bc13cd2 100644 --- a/hysop/backend/host/host_operator.py +++ b/hysop/backend/host/host_operator.py @@ -6,9 +6,13 @@ discrete operators working on the Host backend. opencl backend. """ from abc import ABCMeta +from contextlib import contextmanager from hysop.tools.decorators import debug +from hysop.tools.types import check_instance, first_not_None from hysop.constants import ComputeGranularity, Backend from hysop.core.graph.computational_operator import ComputationalGraphOperator +from hysop.topology.topology_descriptor import TopologyDescriptor + class HostOperator(ComputationalGraphOperator): """ @@ -34,10 +38,188 @@ class HostOperator(ComputationalGraphOperator): return set([Backend.HOST]) + class OpenClMappable(object): + """ + Extend host operator capabilities to work on mapped opencl buffers + """ + + class OpenClMappedMemoryObjectGetter(object): + def __init__(self, obj, evt): + check_instance(obj, OpenClMappable) + self.__obj = obj + self.__evt = evt + def __getitem__(self, key): + return obj.get_mapped_object(key=key) + @property + def evt(self): + return self.__evt @classmethod def supported_backends(cls): sb = super(OpenClMappable, cls).supported_backends() sb.add(Backend.OPENCL) return sb + + @debug + def create_topology_descriptors(self): + if self.enable_opencl_host_buffer_mapping: + # enforce opencl topology on host operator + for (field, topo_descriptor) in self.input_fields.iteritems(): + topo_descriptor = TopologyDescriptor.build_descriptor( + backend=Backend.OPENCL, + operator=self, + field=field, + handle=topo_descriptor, + cl_env=self.cl_env) + self.input_fields[field] = topo_descriptor + + for (field, topo_descriptor) in self.output_fields.iteritems(): + topo_descriptor = TopologyDescriptor.build_descriptor( + backend=Backend.OPENCL, + operator=self, + field=field, + handle=topo_descriptor, + cl_env=self.cl_env) + self.output_fields[field] = topo_descriptor + else: + super(OpenClMappable, self).create_topology_descriptors() + + def __init__(self, cl_env=None, enable_opencl_host_buffer_mapping=False, **kwds): + super(OpenClMappable, self).__init__(**kwds) + + if enable_opencl_host_buffer_mapping: + assert isinstance(self, HostOperator) + + self.__cl_env = cl_env + self.__enable_opencl_host_buffer_mapping = enable_opencl_host_buffer_mapping + + self.__mapped = False + self.__registered_objects = {} + self.__registered_getters = {} + self.__mapped_objects = {} + + @property + def cl_env(self): + return self.__cl_env + + @property + def enable_opencl_host_buffer_mapping(self): + return self.__enable_opencl_host_buffer_mapping + + def setup(self, **kwds): + super(OpenClMappable, self).setup(**kwds) + self._register_fields() + + def _register_fields(self): + from hysop.fields.discrete_field import DiscreteScalarField, DiscreteScalarFieldView + ivfields = set(filter(lambda f: f.backend.kind==Backend.OPENCL, self.input_discrete_fields.values())) + ovfields = set(filter(lambda f: f.backend.kind==Backend.OPENCL, self.output_discrete_fields.values())) + check_instance(ivfields, set, values=DiscreteScalarFieldView) + check_instance(ovfields, set, values=DiscreteScalarFieldView) + vfields = ivfields.union(ovfields) + if vfields: + assert (self.cl_env is not None), 'No opencl environment has been given.' + from hysop.backend.device.opencl.opencl_env import OpenClEnvironment + check_instance(self.cl_env, OpenClEnvironment) + + from hysop.backend.device.opencl import cl + ifields = set(f.dfield for f in ivfields) + ofields = set(f.dfield for f in ovfields) + check_instance(ifields, set, values=DiscreteScalarField) + check_instance(ofields, set, values=DiscreteScalarField) + fields = ifields.union(ofields) + for field in fields: + flags = 0 + if field in ifields: + flags |= cl.map_flags.READ + if field in ofields: + flags |= cl.map_flags.WRITE + assert (field._data is not None) + self.register_mappable_object(key=field, obj=field._data.handle, + flags=flags) + for vfield in vfields: + self.register_data_getter(get_key=vfield, obj_key=vfield.dfield, + getter=vfield._compute_data_view) + + def register_mappable_object(self, key, obj, flags): + from hysop.backend.device.opencl import clArray + msg='Device memory object "{}" has already been registered.' + msg=msg.format(key) + assert (key not in self.__registered_objects), msg + check_instance(obj, clArray.Array) + self.__registered_objects[key] = (obj, flags) + + def register_data_getter(self, get_key, obj_key, getter): + assert callable(getter) + msg='Device memory getter "{}" has already been registered as an object.' + msg=msg.format(get_key) + assert (get_key not in self.__registered_objects), msg + msg='Device memory getter "{}" has already been registered as a getter.' + msg=msg.format(get_key) + assert (get_key not in self.__registered_getters), msg + msg='Device memory object "{}" has not been registered.' + msg=msg.format(obj_key) + assert (obj_key in self.__registered_objects), msg + self.__registered_getters[get_key] = (obj_key, getter) + + def map_objects(self, queue, is_blocking): + DEBUG=False + msg='Device memory objects have already been mapped to host.' + assert not self.__mapped, msg + evt = None + for (obj_key, (dev_buf, flags)) in self.__registered_objects.iteritems(): + if DEBUG: + msg='Mapping {}...'.format(obj_key.full_tag) + print msg + if is_blocking: + host_buf = dev_buf.map_to_host(queue=queue, is_blocking=is_blocking, flags=flags) + else: + host_buf, evt = dev_buf.map_to_host(queue=queue, is_blocking=is_blocking, flags=flags) + self.__mapped_objects[obj_key] = host_buf + for (get_key, (obj_key, getter)) in self.__registered_getters.iteritems(): + if DEBUG: + msg='Applying getter {} to mapped buffer {}...'.format(get_key.full_tag, obj_key.full_tag) + print msg + self.__mapped_objects[get_key] = getter(self.__mapped_objects[obj_key]) + self.__mapped = True + return evt + + def unmap_objects(self): + msg='Device memory objects have already been unmapped from host.' + assert self.__mapped, msg + self.__mapped_objects.clear() + self.__mapped = False + + def get_mapped_object(self, key): + msg='Device memory objects have not been mapped to host yet.' + assert self.__mapped, msg + msg='Device memory object "{}" has not been mapped.' + msg=msg.format(key) + assert key in self.__mapped_objects, msg + return self.__mapped_objects[key] + + def build_object_getter(self, key): + msg='Device memory object "{}" has not been registered.' + msg=msg.format(key) + assert key in self.__registered_objects, msg + return functools.partial(self.get_mapped_object, key=key) + + @contextmanager + def map_objects_to_host(self, queue=None, is_blocking=True): + if self.__registered_objects: + assert (self.cl_env is not None) + queue = first_not_None(queue, self.cl_env.default_queue) + try: + evt = self.map_objects(queue, is_blocking) + yield self.OpenClMappedMemoryObjectGetter(self, evt) + except: + raise + finally: + self.unmap_objects() + else: + try: + yield + except: + raise + diff --git a/hysop/backend/host/python/operator/curl.py b/hysop/backend/host/python/operator/curl.py index 7a14ccab955c03bdea97c294692c416bac6418cd..25aa3a3e70d87dde42a4240981296903452a4d32 100644 --- a/hysop/backend/host/python/operator/curl.py +++ b/hysop/backend/host/python/operator/curl.py @@ -1,6 +1,7 @@ import functools import numba as nb +from hysop import __DEFAULT_NUMBA_TARGET__ from hysop.tools.types import check_instance, first_not_None from hysop.tools.decorators import debug from hysop.tools.numpywrappers import npw @@ -16,7 +17,7 @@ from hysop.operator.base.curl import SpectralCurlOperatorBase nb.void(nb.complex128[:,::-1], nb.float64[::-1], nb.complex128[:,::-1]), nb.void(nb.complex128[:,::-1], nb.complex128[::-1], nb.complex128[:,::-1]), ], '(n,m),(m)->(n,m)', - target='cpu', nopython=True, cache=True) + target=__DEFAULT_NUMBA_TARGET__, nopython=True, cache=True) def filter_curl_2d__0(Fin, K, Fout): for i in range(0, Fin.shape[0]): for j in range(0, Fin.shape[1]): @@ -30,7 +31,7 @@ def filter_curl_2d__0(Fin, K, Fout): nb.void(nb.complex128[:,::-1], nb.float64[::-1], nb.complex128[:,::-1]), nb.void(nb.complex128[:,::-1], nb.complex128[::-1], nb.complex128[:,::-1]), ], '(n,m),(m)->(n,m)', - target='cpu', nopython=True, cache=True) + target=__DEFAULT_NUMBA_TARGET__, nopython=True, cache=True) def filter_curl_2d__1(Fin, K, Fout): for i in range(0, Fin.shape[0]): for j in range(0, Fin.shape[1]): @@ -45,7 +46,7 @@ def filter_curl_2d__1(Fin, K, Fout): nb.void(nb.complex128[:,:,::-1], nb.float64[::-1], nb.complex128[:,:,::-1]), nb.void(nb.complex128[:,:,::-1], nb.complex128[::-1], nb.complex128[:,:,::-1]), ], '(n,m,p),(p)->(n,m,p)', - target='cpu', nopython=True, cache=True) + target=__DEFAULT_NUMBA_TARGET__, nopython=True, cache=True) def filter_curl_3d__0(Fin, K, Fout): for i in range(0, Fin.shape[0]): for j in range(0, Fin.shape[1]): @@ -60,7 +61,7 @@ def filter_curl_3d__0(Fin, K, Fout): nb.void(nb.complex128[:,:,::-1], nb.float64[::-1], nb.complex128[:,:,::-1]), nb.void(nb.complex128[:,:,::-1], nb.complex128[::-1], nb.complex128[:,:,::-1]), ], '(n,m,p),(p)->(n,m,p)', - target='cpu', nopython=True, cache=True) + target=__DEFAULT_NUMBA_TARGET__, nopython=True, cache=True) def filter_curl_3d__1(Fin, K, Fout): for i in range(0, Fin.shape[0]): for j in range(0, Fin.shape[1]): diff --git a/hysop/backend/host/python/operator/diffusion.py b/hysop/backend/host/python/operator/diffusion.py index b84ffcf06137fb4960a50386472274afe8bddb42..26ef4fd6601d7258aa466a0a87ffee71f26f169a 100644 --- a/hysop/backend/host/python/operator/diffusion.py +++ b/hysop/backend/host/python/operator/diffusion.py @@ -1,6 +1,7 @@ import functools import numba as nb +from hysop import __DEFAULT_NUMBA_TARGET__ from hysop.constants import Backend from hysop.tools.types import check_instance, first_not_None from hysop.tools.decorators import debug @@ -22,7 +23,7 @@ class PythonDiffusion(DiffusionOperatorBase, OpenClMappable, HostOperator): @classmethod def build_diffusion_filter(cls, dim, *args, **kwds): - target = kwds.get('target', 'cpu') + target = kwds.get('target', __DEFAULT_NUMBA_TARGET__) assert len(args) == 1 + dim dtype = args[0].dtype if is_complex(dtype): @@ -87,14 +88,15 @@ class PythonDiffusion(DiffusionOperatorBase, OpenClMappable, HostOperator): super(PythonDiffusion, self).apply(**kwds) nu_dt = self.nu() * self.dt() - - for (Fo,Ft,Bt,filter_diffusion) in zip(self.dFout.dfields, - self.forward_transforms, - self.backward_transforms, - self.diffusion_filters): - Ft() - filter_diffusion(nu_dt, Ft.output_buffer) - Bt() + + with self.map_objects_to_host(): + for (Ft,Bt,filter_diffusion) in zip(self.forward_transforms, + self.backward_transforms, + self.diffusion_filters): + Ft() + filter_diffusion(nu_dt, Ft.output_buffer) + Bt() + for Fo in self.dFout.dfields: Fo.exchange_ghosts() diff --git a/hysop/backend/host/python/operator/poisson.py b/hysop/backend/host/python/operator/poisson.py index d5440ce9f14250cd47ee7674221c942a29821731..a88d10f187112db010186fc5ab9b3881c7902b3c 100644 --- a/hysop/backend/host/python/operator/poisson.py +++ b/hysop/backend/host/python/operator/poisson.py @@ -2,6 +2,7 @@ import functools import numba as nb +from hysop import __DEFAULT_NUMBA_TARGET__ from hysop.tools.types import check_instance, first_not_None from hysop.tools.decorators import debug from hysop.tools.numpywrappers import npw @@ -20,7 +21,7 @@ class PythonPoisson(PoissonOperatorBase, HostOperator): @classmethod def build_poisson_filter(cls, dim, *args, **kwds): - target = kwds.get('target', 'cpu') + target = kwds.get('target', __DEFAULT_NUMBA_TARGET__) assert len(args) == 2 + dim signature, _ = make_numba_signature(*args) if (dim==1): diff --git a/hysop/backend/host/python/operator/poisson_curl.py b/hysop/backend/host/python/operator/poisson_curl.py index bd3868b3bcf7cf125bf98f3c9be91d9f70db150a..138f8dc5122bb7c435fd78daa1baa2aa020be448 100644 --- a/hysop/backend/host/python/operator/poisson_curl.py +++ b/hysop/backend/host/python/operator/poisson_curl.py @@ -1,6 +1,7 @@ import functools import numba as nb +from hysop import __DEFAULT_NUMBA_TARGET__ from hysop.tools.types import check_instance, first_not_None from hysop.tools.decorators import debug from hysop.tools.numpywrappers import npw @@ -20,7 +21,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, HostOperator): """ @classmethod - def build_filter_curl_2d__0_m(cls, FIN, K, FOUT, target='cpu'): + def build_filter_curl_2d__0_m(cls, FIN, K, FOUT, target=__DEFAULT_NUMBA_TARGET__): args=(FIN,K,FOUT) signature, _ = make_numba_signature(*args) layout = '(n,m),(m)->(n,m)' @@ -34,7 +35,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, HostOperator): @classmethod - def build_filter_curl_2d__1_n(cls, FIN, K, FOUT, target='cpu'): + def build_filter_curl_2d__1_n(cls, FIN, K, FOUT, target=__DEFAULT_NUMBA_TARGET__): args=(FIN,K,FOUT) signature, _ = make_numba_signature(*args) layout = '(n,m),(n)->(n,m)' @@ -47,7 +48,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, HostOperator): return functools.partial(filter_curl_2d__1_n, *args) @classmethod - def build_filter_curl_3d__0_n(cls, FIN, K, FOUT, target='cpu'): + def build_filter_curl_3d__0_n(cls, FIN, K, FOUT, target=__DEFAULT_NUMBA_TARGET__): args=(FIN,K,FOUT) signature, _ = make_numba_signature(*args) layout='(n,m,p),(n)->(n,m,p)' @@ -61,7 +62,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, HostOperator): return functools.partial(filter_curl_3d__0_n, *args) @classmethod - def build_filter_curl_3d__0_m(cls, FIN, K, FOUT, target='cpu'): + def build_filter_curl_3d__0_m(cls, FIN, K, FOUT, target=__DEFAULT_NUMBA_TARGET__): args=(FIN,K,FOUT) signature, _ = make_numba_signature(*args) layout='(n,m,p),(m)->(n,m,p)' @@ -75,7 +76,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, HostOperator): return functools.partial(filter_curl_3d__0_m, *args) @classmethod - def build_filter_curl_3d__0_p(cls, FIN, K, FOUT, target='cpu'): + def build_filter_curl_3d__0_p(cls, FIN, K, FOUT, target=__DEFAULT_NUMBA_TARGET__): args=(FIN,K,FOUT) signature, _ = make_numba_signature(*args) layout='(n,m,p),(p)->(n,m,p)' @@ -89,7 +90,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, HostOperator): return functools.partial(filter_curl_3d__0_p, *args) @classmethod - def build_filter_curl_3d__1_n(cls, FIN, K, FOUT, target='cpu'): + def build_filter_curl_3d__1_n(cls, FIN, K, FOUT, target=__DEFAULT_NUMBA_TARGET__): args=(FIN,K,FOUT) signature, _ = make_numba_signature(*args) layout='(n,m,p),(n)->(n,m,p)' @@ -103,7 +104,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, HostOperator): return functools.partial(filter_curl_3d__1_n, *args) @classmethod - def build_filter_curl_3d__1_m(cls, FIN, K, FOUT, target='cpu'): + def build_filter_curl_3d__1_m(cls, FIN, K, FOUT, target=__DEFAULT_NUMBA_TARGET__): args=(FIN,K,FOUT) signature, _ = make_numba_signature(*args) layout='(n,m,p),(m)->(n,m,p)' @@ -117,7 +118,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, HostOperator): return functools.partial(filter_curl_3d__1_m, *args) @classmethod - def build_filter_curl_3d__1_p(cls, FIN, K, FOUT, target='cpu'): + def build_filter_curl_3d__1_p(cls, FIN, K, FOUT, target=__DEFAULT_NUMBA_TARGET__): args=(FIN,K,FOUT) signature, _ = make_numba_signature(*args) layout='(n,m,p),(p)->(n,m,p)' diff --git a/hysop/backend/host/python/operator/solenoidal_projection.py b/hysop/backend/host/python/operator/solenoidal_projection.py index d7aa4e451c11c05081561f3198843efa17739678..7659384c46404b987cbac96c33ccb6e9edba147d 100644 --- a/hysop/backend/host/python/operator/solenoidal_projection.py +++ b/hysop/backend/host/python/operator/solenoidal_projection.py @@ -2,6 +2,7 @@ import functools import numba as nb +from hysop import __DEFAULT_NUMBA_TARGET__ from hysop.tools.types import check_instance, first_not_None from hysop.tools.decorators import debug from hysop.tools.numpywrappers import npw @@ -17,7 +18,7 @@ class PythonSolenoidalProjection(SolenoidalProjectionOperatorBase, HostOperator) """ @classmethod - def build_projection_filter(cls, FIN, FOUT, K, KK, target='cpu'): + def build_projection_filter(cls, FIN, FOUT, K, KK, target=__DEFAULT_NUMBA_TARGET__): assert len(FIN)==len(FOUT)==3 assert len(K)==len(KK)==9 args = FIN+K+KK+FOUT @@ -64,7 +65,7 @@ class PythonSolenoidalProjection(SolenoidalProjectionOperatorBase, HostOperator) return functools.partial(filter_projection_3d, *args) - def build_divergence_filters(self, target='cpu'): + def build_divergence_filters(self, target=__DEFAULT_NUMBA_TARGET__): def make_div_filter(*args): signature, _ = make_numba_signature(*args) layout = '(m,n,p), (m,n,p), (m,n,p), (m),(n),(p) -> (m,n,p)' diff --git a/hysop/core/graph/computational_operator.py b/hysop/core/graph/computational_operator.py index 4b218dbad8338f55a548e8b437997400546bcd4c..5c3e192bfb7612fc6d6e8284b194e88b346ac01a 100644 --- a/hysop/core/graph/computational_operator.py +++ b/hysop/core/graph/computational_operator.py @@ -673,3 +673,8 @@ class ComputationalGraphOperator(ComputationalGraphNode): @classmethod def default_method(cls): return dict() + + @property + def enable_opencl_host_buffer_mapping(self): + return False + diff --git a/hysop/fields/cartesian_discrete_field.py b/hysop/fields/cartesian_discrete_field.py index 210db7a29c05ff5d2a5aa952a8477e1186265cc4..00b87557511dcc01d2373769deae0da96773cdb1 100644 --- a/hysop/fields/cartesian_discrete_field.py +++ b/hysop/fields/cartesian_discrete_field.py @@ -502,24 +502,33 @@ class CartesianDiscreteScalarFieldView(CartesianDiscreteScalarFieldViewContainer obj._data_view = None return obj - def _compute_data_view(self): + def _compute_data_view(self, data=None): """ Compute transposed views of underlying discrete field data according to topology state. + This is called after the discrete field has allocated data. Arrays are reshaped and set read-only if necessary. + + This can also be called from an hysop.backend.host.host_operator.OpenClMappable object + to map an opencl generated pointer to host (in this case custom data is passed + and self_data == False). """ - if (self._dfield._data is None): - msg='{}::{} data has not been set yet.' + self_data = (data is None) + data = first_not_None(data, self._dfield._data) + if (data is None): + if self_data: + msg='{}::{} internal data has not been set yet.' + else: + msg='{}::{} cannot compute data view from external None data.' msg=msg.format(type(self._dfield).__name__, self._dfield.name) raise RuntimeError(msg) - if (self.memory_order is MemoryOrdering.C_CONTIGUOUS): - dataview = self._dfield._data.reshape(self.resolution) + dataview = data.reshape(self.resolution) assert dataview.flags.c_contiguous elif (self.memory_order is MemoryOrdering.F_CONTIGUOUS): - dataview = self._dfield._data.reshape(self.resolution[::-1]) + dataview = data.reshape(self.resolution[::-1]) dataview = dataview.T assert dataview.flags.f_contiguous else: @@ -531,7 +540,11 @@ class CartesianDiscreteScalarFieldView(CartesianDiscreteScalarFieldViewContainer if self.is_read_only: if isinstance(dataview, np.ndarray): npw.set_readonly(dataview) - self._data_view = dataview + + if self_data: + self._data_view = dataview + else: + return dataview def __get_data_view(self): if (self._data_view is None): @@ -1346,6 +1359,11 @@ class CartesianDiscreteScalarField(CartesianDiscreteScalarFieldView, DiscreteSca def is_tmp(self): return False + def __eq__(self, other): + return id(self) == id(other) + def __hash__(self): + return id(self) + class TmpCartesianDiscreteScalarField(CartesianDiscreteScalarField): @debug diff --git a/hysop/numerics/fft/host_fft.py b/hysop/numerics/fft/host_fft.py index a477679ed0c3fdbf606e643f00fa3812e5593289..69222d79b597a1adb88ce175201321771a896390 100644 --- a/hysop/numerics/fft/host_fft.py +++ b/hysop/numerics/fft/host_fft.py @@ -9,6 +9,7 @@ OpenCl backend base interface for fast Fourier Transforms. import psutil import numpy as np +from hysop import __MAX_THREADS__ from hysop.tools.types import first_not_None, check_instance from hysop.backend.host.host_array_backend import HostArrayBackend from hysop.backend.host.host_array import HostArray @@ -66,7 +67,7 @@ class HostFFTI(FFTI): Get the default host FFT interface which is a multithreaded FFTW interface with ESTIMATE planning effort. """ - threads = first_not_None(threads, psutil.cpu_count(logical=False)) + threads = first_not_None(threads, __MAX_THREADS__) planner_effort = first_not_None(planner_effort, 'FFTW_ESTIMATE') from hysop.numerics.fft.fftw_fft import FftwFFT return FftwFFT(threads=threads, @@ -92,14 +93,14 @@ class HostFFTI(FFTI): src = self.ensure_callable(src) dst = self.ensure_callable(dst) def exec_copy(src=src, dst=dst): - dst[...] += src + dst()[...] += src() return exec_copy def plan_transpose(self, op, src, dst, axes): src = self.ensure_callable(src) dst = self.ensure_callable(dst) def exec_transpose(src=src, dst=dst, axes=axes): - dst[...] = np.transpose(a=src, axes=axes) + dst()[...] = np.transpose(a=src(), axes=axes) return exec_transpose def plan_fill_zeros(self, op, a, slices): diff --git a/hysop/operator/base/spectral_operator.py b/hysop/operator/base/spectral_operator.py index 4727d444ce263879752fce5e6c8f19ce96bb4653..68872aa058a1b835bbe75cea024cc412e2ed3cab 100644 --- a/hysop/operator/base/spectral_operator.py +++ b/hysop/operator/base/spectral_operator.py @@ -19,7 +19,6 @@ from hysop.core.graph.graph import not_initialized as _not_initialized, \ discretized as _discretized, \ ready as _ready from hysop.core.graph.computational_node_frontend import ComputationalGraphNodeFrontend -from hysop.topology.topology_descriptor import TopologyDescriptor from hysop.topology.cartesian_descriptor import CartesianTopologyDescriptors from hysop.fields.continuous_field import Field, ScalarField, TensorField from hysop.symbolic.array import SymbolicArray @@ -116,7 +115,6 @@ class SpectralOperatorBase(object): @debug def __init__(self, fft_interface=None, fft_interface_kwds=None, - enable_opencl_host_buffer_mapping=False, **kwds): """ Initialize a spectral operator base. @@ -130,22 +128,8 @@ class SpectralOperatorBase(object): self.transform_groups = {} # dict[tag] -> SpectralTransformGroup - if enable_opencl_host_buffer_mapping: - from hysop.backend.host.host_operator import HostOperator - from hysop.backend.device.opencl import cl - from hysop.backend.device.opencl.opencl_env import OpenClEnvironment - assert isinstance(self, HostOperator) - msg='cl_env has not been given.' - assert ('cl_env' in kwds), msg - cl_env = kwds.pop('cl_env') - check_instance(cl_env, OpenClEnvironment) - msg='enable_opencl_host_buffer_mapping is currently only compatible with CPU devices.' - assert (cl_env.device.type == cl.device_type.CPU), msg - self.cl_env = cl_env - self.fft_interface = fft_interface self.fft_interface_kwds = fft_interface_kwds - self.enable_opencl_host_buffer_mapping = enable_opencl_host_buffer_mapping def new_transform_group(self, tag=None, mem_tag=None): """ @@ -167,29 +151,6 @@ class SpectralOperatorBase(object): for tg in self.transform_groups.values(): backend = tg.initialize(**kwds) - @debug - def create_topology_descriptors(self): - if self.enable_opencl_host_buffer_mapping: - # enforce opencl topology on host operator - for (field, topo_descriptor) in self.input_fields.iteritems(): - topo_descriptor = TopologyDescriptor.build_descriptor( - backend=Backend.OPENCL, - operator=self, - field=field, - handle=topo_descriptor, - cl_env=self.cl_env) - self.input_fields[field] = topo_descriptor - - for (field, topo_descriptor) in self.output_fields.iteritems(): - topo_descriptor = TopologyDescriptor.build_descriptor( - backend=Backend.OPENCL, - operator=self, - field=field, - handle=topo_descriptor, - cl_env=self.cl_env) - self.output_fields[field] = topo_descriptor - else: - super(SpectralOperatorBase, self).create_topology_descriptors() def get_field_requirements(self): requirements = super(SpectralOperatorBase, self).get_field_requirements() @@ -240,9 +201,14 @@ class SpectralOperatorBase(object): fft_interface.check_backend(backend, enable_opencl_host_buffer_mapping=enable_opencl_host_buffer_mapping) - self.backend = backend - self.host_backend = backend.host_array_backend - self.FFTI = fft_interface + self.buffer_backend = backend + self.host_backend = backend.host_array_backend + self.backend = fft_interface.backend + self.FFTI = fft_interface + + for tg in self.transform_groups.values(): + tg.discretize_wavenumbers(backend=fft_interface.backend) + del self.fft_interface del self.fft_interface_kwds @@ -250,19 +216,18 @@ class SpectralOperatorBase(object): memory_requests = {} for tg in self.transform_groups.values(): for (k,v) in tg.get_mem_requests(**kwds).iteritems(): - key = (k,tg.backend) - if key in memory_requests: - memory_requests[key] = max(memory_requests[key], v) + if k in memory_requests: + memory_requests[k] = max(memory_requests[k], v) else: - memory_requests[key] = v + memory_requests[k] = v return memory_requests def get_work_properties(self, **kwds): requests = super(SpectralOperatorBase, self).get_work_properties(**kwds) - for ((k,backend),v) in self.get_mem_requests(**kwds).iteritems(): + for (k,v) in self.get_mem_requests(**kwds).iteritems(): check_instance(k, str) check_instance(v, (int, long)) - mrequest = MemoryRequest(backend=backend, size=v, + mrequest = MemoryRequest(backend=self.backend, size=v, alignment=self.min_fft_alignment) requests.push_mem_request(request_identifier=k, mem_request=mrequest) return requests @@ -453,27 +418,28 @@ class SpectralTransformGroup(object): assert len(compute_shapes)==1, 'Fields shape mismatch:'+format_error(compute_shapes) assert len(compute_dtypes)==1, 'Fields data type mismatch.'+format_error(compute_dtypes) - domain = self._domain backend = next(iter(backends)) grid_resolution = next(iter(grid_resolutions)) compute_axes = next(iter(compute_axes)) compute_shape = next(iter(compute_shapes)) compute_dtype = next(iter(compute_dtypes)) - compute_data = (compute_dtype, compute_axes, compute_shape) + self.backend = backend + self.grid_resolution = grid_resolution + self.compute_axes = compute_axes + self.compute_shape = compute_shape + self.compute_dtype = compute_dtype + + def discretize_wavenumbers(self, backend): discrete_wave_numbers = {} for wn in self._wave_numbers: - (idx, freqs, nd_freqs) = self.build_wave_number(domain, grid_resolution, - backend, wn, *compute_data) + (idx, freqs, nd_freqs) = self.build_wave_number(self._domain, self.grid_resolution, + backend, wn, + self.compute_dtype, self.compute_axes, self.compute_shape) self._indexed_wave_numbers[wn].indexed_object.to_backend(backend.kind).bind_memory_object(freqs) - self._indexed_wave_numbers[wn].index.bind_axes(compute_axes) + self._indexed_wave_numbers[wn].index.bind_axes(self.compute_axes) discrete_wave_numbers[wn] = (idx, freqs, nd_freqs) self._discrete_wave_numbers = discrete_wave_numbers - - self.backend = backend - self.compute_axes = compute_axes - self.compute_shape = compute_shape - self.compute_dtype = compute_dtype @classmethod def build_wave_number(cls, domain, grid_resolution, @@ -1199,14 +1165,26 @@ class PlannedSpectralTransform(object): print 'zero_fill_output_slices: {}'.format(slc_format(self._zero_fill_output_slices)) def get_mapped_input_buffer(self): - return self.input_buffer + return self.get_mapped_full_input_buffer()[self.input_slices] def get_mapped_output_buffer(self): - return self.output_buffer + return self.get_mapped_full_output_buffer()[self.output_slices] def get_mapped_full_input_buffer(self): - return self.full_input_buffer + dfield = self._dfield + if (self.is_forward + and dfield.backend.kind == Backend.OPENCL + and self.transform_group._op.enable_opencl_host_buffer_mapping): + return self.transform_group._op.get_mapped_object(dfield)[dfield.compute_slices] + else: + return self.full_input_buffer def get_mapped_full_output_buffer(self): - return self.full_output_buffer - + dfield = self._dfield + if (self.is_backward + and dfield.backend.kind == Backend.OPENCL + and self.transform_group._op.enable_opencl_host_buffer_mapping): + return self.transform_group._op.get_mapped_object(dfield)[dfield.compute_slices] + else: + return self.full_output_buffer + def determine_buffer_shape(cls, transform_shape, target_is_buffer, offsets, axes): offsets = tuple(offsets[ai] for ai in axes) slices = [] @@ -1467,8 +1445,9 @@ class PlannedSpectralTransform(object): nbytes = max(nbytes, compute_nbytes(self.input_shape, self.input_dtype)) nbytes = max(nbytes, compute_nbytes(self.output_shape, self.output_dtype)) + backend = self.transform_group._op.FFTI.backend mem_tag = self.transform_group.mem_tag - kind = self.backend.kind + kind = backend.kind B0_tag = '{}_{}_B0'.format(mem_tag, kind) B1_tag = '{}_{}_B1'.format(mem_tag, kind) @@ -1560,12 +1539,6 @@ SPECTRAL TRANSFORM SETUP print msg fft_plans = () - - # enqueue map for input and output buffers - if (self.backend.kind == Backend.HOST): - mapped_ - FFTI.plan_map_buffers() - for i in xrange(ntransforms): transpose = transpose_info[i] transform = transform_info[i] diff --git a/hysop/tools/numba_utils.py b/hysop/tools/numba_utils.py index 685d7fc616e4bfe10ee249f3eab764ad77a76689..ff2e524d27e15424f787aa09d0d45bc6c42e7d83 100644 --- a/hysop/tools/numba_utils.py +++ b/hysop/tools/numba_utils.py @@ -45,7 +45,17 @@ def make_numba_signature(*args): numba_args = () numba_layout = () for a in args: - if isinstance(a, (np.ndarray,Array)): + if isinstance(a, Array): + assert a.dtype.type in dtype_to_ntype, a.dtype.type + dtype = dtype_to_ntype[a.dtype.type] + if a.is_c_contiguous: + na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout='C') + elif a.is_fortran_contiguous: + na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout='F') + else: + na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout='A') + numba_layout += (format_shape(*a.shape),) + elif isinstance(a, np.ndarray): assert a.dtype.type in dtype_to_ntype, a.dtype.type dtype = dtype_to_ntype[a.dtype.type] if a.flags['C_CONTIGUOUS']: