diff --git a/hysop/backend/device/opencl/opencl_fft.py b/hysop/backend/device/opencl/opencl_fft.py index cc4e2330a89bc24917997e325e3639f115af19d4..07d3b680b3870ba47f0e250806d6424b90b287b0 100644 --- a/hysop/backend/device/opencl/opencl_fft.py +++ b/hysop/backend/device/opencl/opencl_fft.py @@ -1,7 +1,14 @@ - import warnings import numpy as np -from gpyfft.fft import FFT, gfft, GFFT +try: + from gpyfft.fft import FFT, gfft, GFFT +except ImportError as e: + class FFT(): + def __init__(self): + assert False, "Du to gpyfft import error ({}), this class is useless".format(e) + gfft, GFFT = None, None + print e + print "Some functionnalities may not work. It seems that hysop is called from non OpenCL machine." from hysop import vprint from hysop.tools.types import first_not_None from hysop.tools.warning import HysopWarning @@ -11,9 +18,10 @@ from hysop.tools.numpywrappers import npw from hysop.backend.device.opencl import cl, clArray from hysop.backend.device.codegen.base.variables import dtype_to_ctype + class OpenClFFT(FFT): - def __init__(self, context, queue, in_array, + def __init__(self, context, queue, in_array, out_array=None, axes=None, fast_math=False, real=False, keep_buffer_offset=False): @@ -28,9 +36,9 @@ class OpenClFFT(FFT): axes = np.argsort(in_array.strides) else: axes = np.asarray(axes) - + t_strides_in, t_distance_in, t_batchsize_in, t_shape, axes_transform = \ - self.calculate_transform_strides(axes, in_array) + self.calculate_transform_strides(axes, in_array) if (out_array is not None): t_inplace = False @@ -44,7 +52,7 @@ class OpenClFFT(FFT): assert (out_array.offset + out_array.nbytes) < in_array.offset else: t_inplace = True - msg='error finding transform axis (consider setting axes argument)' + msg = 'error finding transform axis (consider setting axes argument)' assert np.all(axes_transform == axes_transform_out), msg else: out_array = in_array @@ -57,29 +65,29 @@ class OpenClFFT(FFT): precision = gfft.CLFFT_DOUBLE if in_array.dtype in (np.float32, np.float64): - layout_in = gfft.CLFFT_REAL + layout_in = gfft.CLFFT_REAL layout_out = gfft.CLFFT_HERMITIAN_INTERLEAVED expected_out_shape = list(in_array.shape) expected_out_shape[axes_transform[0]] = \ - expected_out_shape[axes_transform[0]]//2 + 1 - msg='output array shape {} does not match expected shape: {}' - msg=msg.format(out_array.shape, expected_out_shape) + expected_out_shape[axes_transform[0]]//2 + 1 + msg = 'output array shape {} does not match expected shape: {}' + msg = msg.format(out_array.shape, expected_out_shape) assert out_array.shape == tuple(expected_out_shape), msg elif in_array.dtype in (np.complex64, np.complex128): if not real: - layout_in = gfft.CLFFT_COMPLEX_INTERLEAVED + layout_in = gfft.CLFFT_COMPLEX_INTERLEAVED layout_out = gfft.CLFFT_COMPLEX_INTERLEAVED else: - layout_in = gfft.CLFFT_HERMITIAN_INTERLEAVED + layout_in = gfft.CLFFT_HERMITIAN_INTERLEAVED layout_out = gfft.CLFFT_REAL t_shape = t_shape_out if t_inplace and ((layout_in is gfft.CLFFT_REAL) or (layout_out is gfft.CLFFT_REAL)): - assert ((in_array.strides[axes_transform[0]] == in_array.dtype.itemsize) and \ + assert ((in_array.strides[axes_transform[0]] == in_array.dtype.itemsize) and (out_array.strides[axes_transform[0]] == out_array.dtype.itemsize)), \ - 'inline real transforms need stride 1 for first transform axis' + 'inline real transforms need stride 1 for first transform axis' self.t_shape = t_shape self.batchsize = t_batchsize_in @@ -93,24 +101,24 @@ class OpenClFFT(FFT): plan.batch_size = self.batchsize plan.precision = precision plan.layouts = (layout_in, layout_out) - + assert not keep_buffer_offset - (in_data, out_data) = self.set_offset_callbacks(plan, in_array, out_array, - layout_in, layout_out, - keep_buffer_offset) - + (in_data, out_data) = self.set_offset_callbacks(plan, in_array, out_array, + layout_in, layout_out, + keep_buffer_offset) + self.plan = plan - self.in_array = in_array + self.in_array = in_array self.out_array = out_array self.in_data = in_data self.out_data = out_data self.temp_buffer = None - self._baked = False - self._allocated = False - + self._baked = False + self._allocated = False + def set_offset_callbacks(self, plan, in_array, out_array, layout_in, layout_out, - keep_buffer_offset): + keep_buffer_offset): try: if keep_buffer_offset: raise clArray.ArrayHasOffsetError @@ -131,38 +139,38 @@ class OpenClFFT(FFT): post, output_buffer_offset = self.post_offset_callback(out_array, layout_out) plan.set_callback('post_callback', post, 'post', user_data=None) - self.input_buffer_offset = input_buffer_offset + self.input_buffer_offset = input_buffer_offset self.output_buffer_offset = output_buffer_offset return (in_data, out_data) - + def bake(self): if self._baked: - msg='Plan was already baked.' + msg = 'Plan was already baked.' raise RuntimeError(msg) msg = 'Baking {}[precision={}, shape={}, inplace={}, layout_in={}, layout_out={}]'.format( - self.__class__.__name__, - self.precision, self.t_shape, self.t_inplace, - self.layout_in, self.layout_out) + self.__class__.__name__, + self.precision, self.t_shape, self.t_inplace, + self.layout_in, self.layout_out) self.plan.bake(self.queue) self._baked = True return self def allocate(self, buf=None): if self._allocated: - msg='Plan was already allocated.' + msg = 'Plan was already allocated.' raise RuntimeError(msg) size = self.plan.temp_array_size - if (size>0): + if (size > 0): if (buf is None): - msg='Allocating temporary buffer of size {} for clFFT::{}.' - msg=msg.format(bytes2str(size), id(self)) + msg = 'Allocating temporary buffer of size {} for clFFT::{}.' + msg = msg.format(bytes2str(size), id(self)) warnings.warn(msg, HysopWarning) buf = cl.Buffer(self.context, cl.mem_flags.READ_WRITE, size=size) self.temp_buffer = buf elif (buf.size != size): - msg='Buffer does not match required size: {} != {}' - msg=msg.format(buf.size, size) + msg = 'Buffer does not match required size: {} != {}' + msg = msg.format(buf.size, size) raise ValueError(msg) else: self.temp_buffer = buf.data @@ -174,9 +182,9 @@ class OpenClFFT(FFT): def enqueue(self, queue=None, wait_for_events=None, direction_forward=True): """ Enqueue transform with array base_data. - /!\ Do not forget to offset input and output by array.offset - within custom user callbacks, only base_data is passed - to ensure OpenCL pointers alignment of kernel arguments. + /!\ Do not forget to offset input and output by array.offset + within custom user callbacks, only base_data is passed + to ensure OpenCL pointers alignment of kernel arguments. See self.set_offset_callbacks(). """ self._assert_ready() @@ -184,21 +192,21 @@ class OpenClFFT(FFT): queue = first_not_None(queue, self.queue) if self.t_inplace: - events = self.plan.enqueue_transform((queue,), - (in_data,), - direction_forward=direction_forward, - temp_buffer=self.temp_buffer, - wait_for_events=wait_for_events) + events = self.plan.enqueue_transform((queue,), + (in_data,), + direction_forward=direction_forward, + temp_buffer=self.temp_buffer, + wait_for_events=wait_for_events) else: - events = self.plan.enqueue_transform((queue,), - (in_data,), (out_data), - direction_forward=direction_forward, - temp_buffer=self.temp_buffer, - wait_for_events=wait_for_events) + events = self.plan.enqueue_transform((queue,), + (in_data,), (out_data), + direction_forward=direction_forward, + temp_buffer=self.temp_buffer, + wait_for_events=wait_for_events) return events - + def enqueue_arrays(self, *args, **kwds): - msg='Enqueue arrays is not supported.' + msg = 'Enqueue arrays is not supported.' raise NotImplementedError(msg) @property @@ -212,94 +220,94 @@ class OpenClFFT(FFT): def _assert_ready(self): if __debug__ and not self.ready: - msg='Plan is not ready:' - msg+='\n *baked: {}' - msg+='\n *allocated: {}' - msg+='\n' - msg=msg.format(self._baked, self._allocated) + msg = 'Plan is not ready:' + msg += '\n *baked: {}' + msg += '\n *allocated: {}' + msg += '\n' + msg = msg.format(self._baked, self._allocated) raise RuntimeError(msg) - + @classmethod def check_dtype(cls, dtype, layout): if layout in (gfft.CLFFT_HERMITIAN_INTERLEAVED, gfft.CLFFT_COMPLEX_INTERLEAVED): if not is_complex(dtype): - msg='Layout is {} but got array with dtype {}.' - msg=msg.format(layout, dtype) + msg = 'Layout is {} but got array with dtype {}.' + msg = msg.format(layout, dtype) raise RuntimeError(msg) elif layout in (gfft.CLFFT_REAL,): if not is_fp(dtype): - msg='Layout is CLFFT_REAL but got array with dtype {}.' - msg=msg.format(dtype) + msg = 'Layout is CLFFT_REAL but got array with dtype {}.' + msg = msg.format(dtype) raise RuntimeError(msg) else: - msg='Unsupported data layout {}.' - msg=msg.format(layout) + msg = 'Unsupported data layout {}.' + msg = msg.format(layout) raise NotImplementedError(msg) - + def pre_offset_callback(self, in_array, layout_in): dtype = in_array.dtype fp = dtype_to_ctype(dtype) self.check_dtype(dtype, layout_in) if (in_array.offset % dtype.itemsize) != 0: - msg='Unaligned array offset.' + msg = 'Unaligned array offset.' raise RuntimeError(msg) base_offset = (in_array.offset // dtype.itemsize) callback = \ - ''' + ''' {fp} pre_callback(__global void* input, const uint offset, __global void* userdata) {{ - __global {fp}* in = (__global {fp}*) input; + __global {fp}* in = (__global {fp}*) input; return in[{base_offset}uL+offset]; }} '''.format(fp=fp, base_offset=base_offset) - + input_buffer_offset = '{}uL'.format(base_offset) - + return callback, input_buffer_offset - + def post_offset_callback(self, out_array, layout_out): dtype = out_array.dtype self.check_dtype(dtype, layout_out) fp = dtype_to_ctype(dtype) if (out_array.offset % dtype.itemsize) != 0: - msg='Unaligned array offset.' + msg = 'Unaligned array offset.' raise RuntimeError(msg) base_offset = (out_array.offset // dtype.itemsize) - + callback = \ - ''' + ''' void post_callback(__global void* output, const uint offset, __global void* userdata, const {fp} fftoutput) {{ - __global {fp}* out = (__global {fp}*) output; + __global {fp}* out = (__global {fp}*) output; out[{base_offset}uL+offset] = fftoutput; }} '''.format(fp=fp, base_offset=base_offset) output_buffer_offset = '{}uL'.format(base_offset) - + return callback, output_buffer_offset - + @classmethod def allocate_plans(cls, operator, plans): tmp_size = max(plan.required_buffer_size for plan in plans) - msg='Allocating an additional {} temporary buffer for clFFT in operator {}.'.format( - bytes2str(tmp_size), operator.name) + msg = 'Allocating an additional {} temporary buffer for clFFT in operator {}.'.format( + bytes2str(tmp_size), operator.name) - if (tmp_size>0): + if (tmp_size > 0): vprint(msg) tmp_buffer = operator.backend.empty(shape=(tmp_size), dtype=npw.uint8) for plan in plans: if (plan.required_buffer_size > tmp_buffer.nbytes): - msg='\nFATAL ERROR: Failed to allocate temporary buffer for clFFT.' - msg+='\n => clFFT expected {} bytes but only {} bytes have been allocated.\n' - msg=msg.format(plan.required_buffer_size, tmp_buffer.nbytes) + msg = '\nFATAL ERROR: Failed to allocate temporary buffer for clFFT.' + msg += '\n => clFFT expected {} bytes but only {} bytes have been allocated.\n' + msg = msg.format(plan.required_buffer_size, tmp_buffer.nbytes) raise RuntimeError(msg) - elif (plan.required_buffer_size>0): + elif (plan.required_buffer_size > 0): buf = tmp_buffer[:plan.required_buffer_size] plan.allocate(buf=buf) else: @@ -310,4 +318,3 @@ class OpenClFFT(FFT): plan.allocate() tmp_buffer = None return tmp_buffer -