diff --git a/hysop/backend/device/opencl/operator/poisson_curl.py b/hysop/backend/device/opencl/operator/poisson_curl.py index d07cf55428dd2f21df123231a264a012651ae07e..aa57478c768a7490260f0deec8f88c510aef9e4e 100644 --- a/hysop/backend/device/opencl/operator/poisson_curl.py +++ b/hysop/backend/device/opencl/operator/poisson_curl.py @@ -4,6 +4,7 @@ from hysop.tools.numpywrappers import npw from hysop.tools.decorators import debug from hysop.tools.warning import HysopWarning from hysop.tools.units import bytes2str +from hysop.tools.numerics import is_complex, find_common_dtype from hysop.operator.base.poisson_curl import SpectralPoissonCurlOperatorBase from hysop.backend.device.opencl.opencl_symbolic import OpenClSymbolic from hysop.core.graph.graph import op_apply @@ -54,10 +55,35 @@ class OpenClPoissonCurl(SpectralPoissonCurlOperatorBase, OpenClSymbolic): expr = Assignment(wout, win / (1 - nu*dt*F)) self.require_symbolic_kernel('diffusion_kernel__{}'.format(i), expr) Win = Wout - - exprs = () + indices = local_indices_symbols[:dim] cond = LogicalAND(*tuple(LogicalEQ(idx,0) for idx in indices)) + + if self.should_project: + exprs = () + dtype = find_common_dtype(*tuple(Ft.output_dtype for Ft in self.W_forward_transforms)) + Cs = self.symbolic_tmp_scalars('C', dtype=dtype, count=3) + for i in xrange(3): + expr = 0 + for j in xrange(3): + e = Win[j] + if (i==j): + e = KK[j][j]*e + else: + e = (ComplexMul(K[j][j], e) if K[j][j].Wn.is_complex else K[j][j]*e) + e = (ComplexMul(K[j][i], e) if K[j][i].Wn.is_complex else K[j][i]*e) + expr += e + expr /= sum(KK[i]) + expr = Select(expr, 0, cond) + expr = Assignment(Cs[i], expr) + exprs += (expr,) + for i in xrange(3): + expr = Assignment(Wout[i], Win[i]-Cs[i]) + exprs += (expr,) + self.require_symbolic_kernel('projection_kernel', *exprs) + Win = Wout + + exprs = () for i in xrange(wcomp): F = sum(KK[i]) expr = Assignment(Win[i], Select(Win[i]/F,0,cond)) @@ -121,8 +147,10 @@ class OpenClPoissonCurl(SpectralPoissonCurlOperatorBase, OpenClSymbolic): def _build_projection_kernel(self): if self.should_project: - raise NotImplementedError - self.filter_projection, _ = self.symbolic_kernels['projection_kernel'] + knl, _ = self.symbolic_kernels['projection_kernel'] + self.filter_projection = functools.partial(knl, queue=self.cl_env.default_queue) + else: + self.filter_projection = None def _build_ghost_exchangers(self): def noop(): diff --git a/hysop/backend/host/python/operator/poisson_curl.py b/hysop/backend/host/python/operator/poisson_curl.py index 5e065e382b3d6184f0c5f76d8b4084377bfb4b10..132a4a2d3222e27f0f760bdb4dca5ed010d9752a 100644 --- a/hysop/backend/host/python/operator/poisson_curl.py +++ b/hysop/backend/host/python/operator/poisson_curl.py @@ -154,7 +154,9 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, OpenClMappable, HostOpe # projection filter if self.should_project: self.projection_filter = PythonSolenoidalProjection.build_projection_filter( - WIN,WIN,K,KK) + WIN,WIN, + sum(K, ()), + sum(KK, ())) else: self.projection_filter = None @@ -204,7 +206,7 @@ class PythonPoissonCurl(SpectralPoissonCurlOperatorBase, OpenClMappable, HostOpe for (Wi, filter_diffusion) in zip(self.WIN, self.diffusion_filters): filter_diffusion(nu_dt, Wi) if project: - self.filter_projection() + self.projection_filter() if (diffuse or project): for Bt in self.W_backward_transforms: Bt() diff --git a/hysop/numerics/fft/fftw_fft.py b/hysop/numerics/fft/fftw_fft.py index 4185e220cb8852f4e8f6bbcd69ae3187ca5677d8..6af22b3144fae997a757e872a751ed4e8c754302 100644 --- a/hysop/numerics/fft/fftw_fft.py +++ b/hysop/numerics/fft/fftw_fft.py @@ -8,7 +8,7 @@ import warnings import pyfftw import numpy as np -from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__ +from hysop import __FFTW_NUM_THREADS__, __FFTW_PLANNER_EFFORT__, __FFTW_PLANNER_TIMELIMIT__, __VERBOSE__ from hysop.tools.io_utils import IO from hysop.tools.types import first_not_None from hysop.tools.misc import prod @@ -48,7 +48,8 @@ class FftwFFTPlan(HostFFTPlanI): update_cache(cls.cache_file(), h, wisdom) def __init__(self, a, out, scaling=None, **plan_kwds): - super(FftwFFTPlan, self).__init__() + verbose = plan_kwds.pop('verbose', __VERBOSE__) + super(FftwFFTPlan, self).__init__(verbose=verbose) if isinstance(a, HostArray): plan_kwds['input_array'] = a.handle @@ -183,15 +184,17 @@ class FftwFFT(HostFFTI): planner_effort=None, planning_timelimit=None, destroy_input=False, - warn_on_allocation=True, warn_on_misalignment=True, + warn_on_allocation=True, + error_on_allocation=False, backend=None, allocator=None, **kwds): threads = first_not_None(threads, __FFTW_NUM_THREADS__) planner_effort = first_not_None(planner_effort, __FFTW_PLANNER_EFFORT__) planning_timelimit = first_not_None(planning_timelimit, __FFTW_PLANNER_TIMELIMIT__) super(FftwFFT, self).__init__(backend=backend, allocator=allocator, - warn_on_allocation=warn_on_allocation, **kwds) + warn_on_allocation=warn_on_allocation, error_on_allocation=error_on_allocation, + **kwds) self.supported_ftypes = (np.float32, np.float64, np.longdouble) self.supported_ctypes = (np.complex64, np.complex128, np.clongdouble) self.supported_cosine_transforms = (1,2,3,4) @@ -223,6 +226,7 @@ class FftwFFT(HostFFTI): plan_kwds['direction'] = kwds.pop('direction') plan_kwds['axes'] = kwds.pop('axes', (kwds.pop('axis'),)) plan_kwds['threads'] = kwds.pop('threads', self.threads) + plan_kwds['verbose'] = kwds.pop('verbose', __VERBOSE__) plan_kwds['planning_timelimit'] = kwds.pop('planning_timelimit', self.planning_timelimit) flags = () diff --git a/hysop/operator/base/spectral_operator.py b/hysop/operator/base/spectral_operator.py index abc4babfa7b51336b59ec51c83a2ff4f218f4f28..205fbee2bbb03fdcc39e75537252b1e64f97f47e 100644 --- a/hysop/operator/base/spectral_operator.py +++ b/hysop/operator/base/spectral_operator.py @@ -1472,7 +1472,7 @@ class PlannedSpectralTransform(object): dst_nbytes = compute_nbytes(dst_shape, dst_dtype) b0 = src[:src_nbytes].view(dtype=src_dtype).reshape(src_shape) b1 = dst[:dst_nbytes].view(dtype=dst_dtype).reshape(dst_shape) - fft_plan = tg.FFTI.get_transform(tr)(a=b0, out=b1, + fft_plan = tg.FFTI.get_transform(tr)(a=b0.handle, out=b1.handle, axis=self.field.dim-1, verbose=False) fft_plan.setup(queue=queue)