diff --git a/hysop/backend/device/opencl/opencl_fft.py b/hysop/backend/device/opencl/opencl_fft.py index e67bb20594e82347b4d2816f06322c5e906884e9..b5d3c9b2d3df9e5dc3669b769081f2c9c194c400 100644 --- a/hysop/backend/device/opencl/opencl_fft.py +++ b/hysop/backend/device/opencl/opencl_fft.py @@ -36,7 +36,7 @@ class OpenClFFT(FFT): t_inplace = False t_strides_out, t_distance_out, t_batchsize_out, t_shape_out, \ axes_transform_out = self.calculate_transform_strides(axes, out_array) - if (in_array.base_data is out_array.base_data): + if (in_array.base_data == out_array.base_data): # enforce no overlap (unless inplace) if (in_array.offset < out_array.offset): assert (in_array.offset + in_array.nbytes) < out_array.offset @@ -93,9 +93,11 @@ class OpenClFFT(FFT): plan.batch_size = self.batchsize plan.precision = precision plan.layouts = (layout_in, layout_out) - - (in_data, out_data) = self.set_offset_callbacks(plan, in_array, out_array, layout_in, layout_out, - keep_buffer_offset) + + assert not keep_buffer_offset + (in_data, out_data) = self.set_offset_callbacks(plan, in_array, out_array, + layout_in, layout_out, + keep_buffer_offset) self.plan = plan self.in_array = in_array @@ -117,7 +119,6 @@ class OpenClFFT(FFT): except clArray.ArrayHasOffsetError: in_data = in_array.base_data pre, input_buffer_offset = self.pre_offset_callback(in_array, layout_in) - print 'INPUT.DATA FAILED, OFFSET IS {}'.format(input_buffer_offset) plan.set_callback('pre_callback', pre, 'pre', user_data=None) try: @@ -128,7 +129,6 @@ class OpenClFFT(FFT): except clArray.ArrayHasOffsetError: out_data = out_array.base_data post, output_buffer_offset = self.post_offset_callback(out_array, layout_out) - print 'OUTPUT.DATA FAILED, OFFSET IS {}'.format(output_buffer_offset) plan.set_callback('post_callback', post, 'post', user_data=None) self.input_buffer_offset = input_buffer_offset diff --git a/hysop/backend/device/opencl/operator/poisson_rotational.py b/hysop/backend/device/opencl/operator/poisson_rotational.py index 4aa9c99c17d359c28b42e0ea498011bcaacfbf5d..d4c5906696678e6ebe091d91933c1bcd3fbc500e 100644 --- a/hysop/backend/device/opencl/operator/poisson_rotational.py +++ b/hysop/backend/device/opencl/operator/poisson_rotational.py @@ -73,11 +73,6 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): else: msg='dim={}'.format(dim) raise NotImplementedError(msg) - print - print - for e in exprs: - print e - print self.require_symbolic_kernel('recover_velocity', *exprs) super(OpenClPoissonRotational, self).initialize(**kwds) @@ -154,19 +149,16 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): shape[axes[0]] = shape[axes[0]] // 2 + 1 shape = tuple(shape) assert npw.array_equal(shape, self.Ksizes[::-1]) - + request = MemoryRequest.empty_like(a=dU, shape=shape, dtype=ctype) requests.push_mem_request('R2C_C2R', request) return requests @debug def setup(self, work): - print self.backend fft_buffers = work.get_buffer(self, 'R2C_C2R') - print tuple((type(d),d.shape,d.dtype,d.offset) for d in fft_buffers) - #_fft_buffers = tuple(self.backend.empty_like(b) for b in fft_buffers) - #fft_buffers = _fft_buffers - print tuple((type(d),d.shape,d.dtype,d.offset) for d in fft_buffers) + # _fft_buffers = tuple(self.backend.empty(shape=b.shape, dtype=b.dtype) for b in fft_buffers) + # fft_buffers = _fft_buffers for i in xrange(self.dim): self.kernel_buffers[2][i].bind_memory_object(fft_buffers[i]) self.kernel_buffers[3][i].bind_memory_object(fft_buffers[i]) @@ -191,9 +183,6 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): forward_W_plans, backward_U_plans, backward_W_plans = [],[],[] for (i,Wi) in enumerate(self.W_buffers): - print 'FORWARD W{}'.format(i) - print 'INPUT:', 'Wi', Wi.base_data, Wi.offset - print 'OUTPUT:', 'fft_buffer', fft_buffers[i].base_data, fft_buffers[i].offset fp = OpenClFFT(context=context, queue=queue, in_array=Wi, out_array=fft_buffers[i].handle, axes=axes, fast_math=False, @@ -209,9 +198,6 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): backward_W_plans.append(bp) for (i,Ui) in enumerate(self.U_buffers): - print 'BACKWARD U{}'.format(i) - print 'INPUT:', 'fft_buffer', fft_buffers[i].base_data, fft_buffers[i].offset - print 'OUTPUT:', 'Ui', Ui.base_data, Ui.offset bp = OpenClFFT(context=context, queue=queue, in_array=fft_buffers[i].handle, out_array=Ui, axes=axes, fast_math=False, @@ -241,10 +227,9 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): # so we reverse the order of forward transforms. # it = simulation.current_iteration # t = simulation.t() - for fp in self.forward_W_plans: + for fp in self.forward_W_plans[::-1]: evt, = fp.enqueue(queue=self.queue) - evt.wait() - + debug_dumper(1, 0.5, 'Wh', tuple(b.get().handle for b in self.fft_buffers)) # debug_dumper(it, t, 'U', self.dU) @@ -258,12 +243,9 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): # evt = self._exchange_W_ghosts(queue=self.queue) # recover velocity evt = self.compute_velocity_kernel(queue=self.queue) - evt.wait() for bp in self.backward_U_plans: evt, = bp.enqueue() - evt.wait() # debug_dumper(it, t, 'U', self.dU) if (self._exchange_U_ghosts is not None): evt = self._exchange_U_ghosts(queue=self.queue) - evt.wait() # debug_dumper(it, t, 'U', self.dU) diff --git a/hysop/operator/tests/test_poisson_rotational.py b/hysop/operator/tests/test_poisson_rotational.py index 6ce0d11730db685e3fecf49de0dd2067f77bbb50..6223d9b5613f5d8b1b02945563010fb49696a9b3 100644 --- a/hysop/operator/tests/test_poisson_rotational.py +++ b/hysop/operator/tests/test_poisson_rotational.py @@ -189,16 +189,13 @@ class TestPoissonRotationalOperator(object): fns=self.analytic_functions[dim]['Us']) Wref = tuple( data.get().handle.copy() for data in dw.data ) Uref = tuple( data.get().handle.copy() for data in du.data ) - #dbg(0, 0.0, 'U', Uref) - #dbg(0, 0.0, 'W', Wref) du.initialize(self.__random_init, dtype=dtype) op.apply(simulation=None, debug_dumper=dbg) Wout = tuple( data.get().handle.copy() for data in dw.data ) Uout = tuple( data.get().handle.copy() for data in du.data ) - #dbg(1, 1.0, 'W', Wout) - #dbg(1, 1.0, 'U', Uout) + # dbg(1, 1.0, 'U', Uout) self._check_output(impl, op, Wref, Uref, Wout, Uout) print