diff --git a/hysop/backend/device/opencl/operator/poisson_rotational.py b/hysop/backend/device/opencl/operator/poisson_rotational.py index d4c5906696678e6ebe091d91933c1bcd3fbc500e..f64b482f7e9ba1a72cf893dbfc2a33131d8a3e11 100644 --- a/hysop/backend/device/opencl/operator/poisson_rotational.py +++ b/hysop/backend/device/opencl/operator/poisson_rotational.py @@ -157,8 +157,6 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): @debug def setup(self, work): fft_buffers = work.get_buffer(self, 'R2C_C2R') - # _fft_buffers = tuple(self.backend.empty(shape=b.shape, dtype=b.dtype) for b in fft_buffers) - # fft_buffers = _fft_buffers for i in xrange(self.dim): self.kernel_buffers[2][i].bind_memory_object(fft_buffers[i]) self.kernel_buffers[3][i].bind_memory_object(fft_buffers[i]) @@ -225,15 +223,15 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): '''Solve the PoissonRotational equation.''' # /!\ clFFT use the destination buffer as a scratch # so we reverse the order of forward transforms. - # it = simulation.current_iteration - # t = simulation.t() - for fp in self.forward_W_plans[::-1]: - evt, = fp.enqueue(queue=self.queue) - debug_dumper(1, 0.5, 'Wh', tuple(b.get().handle for b in self.fft_buffers)) - - # debug_dumper(it, t, 'U', self.dU) + it = simulation.current_iteration + t = simulation.t() + debug_dumper(it, t, 'U', self.dU) # debug_dumper(it, t, 'W', self.dW) + + for fp in self.forward_W_plans: + evt, = fp.enqueue(queue=self.queue) + # project and recover vorticity if required # if self._do_project(simulation): # evt = self.projection_kernel(queue=self.queue) @@ -245,7 +243,10 @@ class OpenClPoissonRotational(PoissonRotationalOperatorBase, OpenClSymbolic): evt = self.compute_velocity_kernel(queue=self.queue) for bp in self.backward_U_plans: evt, = bp.enqueue() - # debug_dumper(it, t, 'U', self.dU) + + debug_dumper(it, t, 'U', self.dU) + if (self._exchange_U_ghosts is not None): evt = self._exchange_U_ghosts(queue=self.queue) - # debug_dumper(it, t, 'U', self.dU) + + debug_dumper(it, t, 'U', self.dU) diff --git a/hysop/backend/host/python/operator/poisson_rotational.py b/hysop/backend/host/python/operator/poisson_rotational.py index 6f6585a7698213360c90a7c7d9c4d58fe9aa0fbb..0ae67d699f3ed2bc4c9bfda034d367f854ad777e 100644 --- a/hysop/backend/host/python/operator/poisson_rotational.py +++ b/hysop/backend/host/python/operator/poisson_rotational.py @@ -78,8 +78,9 @@ class PythonPoissonRotational(PoissonRotationalOperatorBase, HostOperator): W_buffers, U_buffers = self.W_buffers, self.U_buffers iK2, (Kz,Ky,Kx) = self.iK2, self.K - # it = simulation.current_iteration - # t = simulation.t() + it = simulation.current_iteration + t = simulation.t() + debug_dumper(it, t, 'U', self.dU) W0,W1,W2 = W_buffers U0,U1,U2 = U_buffers @@ -88,7 +89,7 @@ class PythonPoissonRotational(PoissonRotationalOperatorBase, HostOperator): W1_hat = npw.fft.rfftn(W1).astype(self.ctype) W2_hat = npw.fft.rfftn(W2).astype(self.ctype) - debug_dumper(1, 0.5, 'Wh', (W0_hat, W1_hat, W2_hat)) + # debug_dumper(1, 0.5, 'Wh', (W0_hat, W1_hat, W2_hat)) # if project: # divW = iK2*(K[0]*W0_hat + K[1]*W1_hat + K[2]*W2_hat) @@ -121,7 +122,7 @@ class PythonPoissonRotational(PoissonRotationalOperatorBase, HostOperator): Ui_hat[...] = (-Kx*W1_hat + Ky*W0_hat) U2[...] = npw.fft.irfftn(Ui_hat, U2.shape) - # debug_dumper(it, t, 'U', self.dU) + debug_dumper(it, t, 'U', self.dU) self.dU.exchange_ghosts() - # debug_dumper(it, t, 'U', self.dU) + debug_dumper(it, t, 'U', self.dU) diff --git a/hysop/core/graph/computational_graph.py b/hysop/core/graph/computational_graph.py index 117bbdaceace19739c0e2157c35f6cbc4a115b6d..596c52735311348ad21501ad2a3d8248a4d43584 100644 --- a/hysop/core/graph/computational_graph.py +++ b/hysop/core/graph/computational_graph.py @@ -600,7 +600,7 @@ class ComputationalGraph(ComputationalGraphNode): op.setup(work=work) self.ready = True - def build(self, outputs_are_inputs=True, method=None): + def build(self, outputs_are_inputs=True, method=None, allow_subbuffers=False): """ Shortcut for initialize(), discretize(), get_work_properties(), setup() for quick graph initialization. @@ -610,7 +610,7 @@ class ComputationalGraph(ComputationalGraphNode): assert self.is_root, 'Only root graph can be built.' self.discretize() work = self.get_work_properties() - work.allocate(allow_subbuffers=True) + work.allocate(allow_subbuffers=allow_subbuffers) self.setup(work) return self diff --git a/hysop/core/memory/memory_request.py b/hysop/core/memory/memory_request.py index d8b1e47e3021105ddf2ddfcab6abe7be0f24ea0d..7121a7b5de4b2c819e45c7676f05cce756bdebed 100644 --- a/hysop/core/memory/memory_request.py +++ b/hysop/core/memory/memory_request.py @@ -305,7 +305,7 @@ class MultipleOperatorMemoryRequests(object): if allow_subbuffers: data = backend.empty(shape=(total_bytes,), dtype=np.uint8) - for op,requests in op_requests.iteritems(): + for (op,requests) in op_requests.iteritems(): check_instance(requests, list, values=MemoryRequest) start_idx, end_idx = 0, 0 for req in requests: @@ -346,17 +346,52 @@ class MultipleOperatorMemoryRequests(object): views[op][req.id] = tuple(req_views) assert end_idx <= total_bytes else: + buffer_sizes = [] + ordered_requests = {} for (op,requests) in op_requests.iteritems(): + assert op not in ordered_requests check_instance(requests, list, values=MemoryRequest) + op_buffer_sizes = () + op_reqs = () for req in requests: - req_views = [] + nbytes = req.data_bytes_per_component() for i in xrange(req.nb_components): - view = backend.empty(shape=req.shape, dtype=req.dtype) - req_views.append(view) - if (op not in views): - views[op] = {} - if req.nb_components>=1: - views[op][req.id] = tuple(req_views) + op_buffer_sizes += (nbytes,) + op_reqs += (req,) + idx = np.argsort(op_buffer_sizes)[::-1] + op_buffer_sizes = tuple(op_buffer_sizes[i] for i in idx) + op_sorted_reqs = tuple(op_reqs[i] for i in idx) + for (i,size) in enumerate(op_buffer_sizes[::-1]): + if (i>=len(buffer_sizes)): + buffer_sizes.append(size) + else: + buffer_sizes[i] = max(buffer_sizes[i], size) + ordered_requests[op] = op_sorted_reqs + + nbuffers = len(buffer_sizes) + if (nbuffers == 0): + return + + buffers = tuple(backend.empty(shape=(nbytes,), dtype=np.uint8) + for nbytes in buffer_sizes) + + for (op, requests) in ordered_requests.iteritems(): + assert len(buffers)>= len(requests) + views.setdefault(op, {}) + old_req = None + for (buf, req) in zip(buffers, requests): + if (req != old_req): + if (old_req is not None): + assert old_req.id not in views[op] + views[op][old_req.id] = tuple(req_views) + req_views = [] + nbytes = req.data_bytes_per_component() + assert buf.size >= nbytes + view = buf[:nbytes].view(dtype=req.dtype).reshape(req.shape) + req_views.append(view) + old_req = req + assert old_req.id not in views[op] + views[op][old_req.id] = tuple(req_views) def get_buffer(self, operator, request_identifier, handle=False): if not self._allocated: diff --git a/hysop/problem.py b/hysop/problem.py index 9f097cbd47d1d5789f8b808b496338f5498ea3f2..8b30e168ebc2a688824f59fe9c62a302be8cda0c 100644 --- a/hysop/problem.py +++ b/hysop/problem.py @@ -16,7 +16,7 @@ class Problem(ComputationalGraph): self.push_nodes(*ops) @debug - def build(self): + def build(self, allow_subbuffers=False): with Timer() as tm: vprint('\nInitializing problem...') self.initialize(outputs_are_inputs=True, topgraph_method=None) @@ -25,7 +25,7 @@ class Problem(ComputationalGraph): vprint('\nGetting work properties...') work = self.get_work_properties() vprint('\nAllocating work...') - work.allocate(allow_subbuffers=True) + work.allocate(allow_subbuffers=allow_subbuffers) vprint('\nSetting up problem...') self.setup(work) msg=' Problem building took {} ({}s) '