From 9344740c56fa64d9b0e2d073cbb375ae5aada9fb Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr> Date: Mon, 26 Sep 2016 18:24:32 +0200 Subject: [PATCH] advection update --- hysop/gpu/gpu_kernel.py | 5 +- hysop/gpu/gpu_particle_advection.py | 37 ++-- hysop/gpu/static_gpu_particle_advection.py | 202 +++++++++++---------- 3 files changed, 132 insertions(+), 112 deletions(-) diff --git a/hysop/gpu/gpu_kernel.py b/hysop/gpu/gpu_kernel.py index aa8d50e32..4452e3804 100644 --- a/hysop/gpu/gpu_kernel.py +++ b/hysop/gpu/gpu_kernel.py @@ -2,7 +2,7 @@ @file gpu_kernel.py """ from hysop.constants import debug, S_DIR -from hysop import __VERBOSE__ +from hysop import __VERBOSE__, __DEBUG__ from hysop.gpu import cl, CL_PROFILE from hysop.tools.profiler import FProfiler @@ -71,6 +71,9 @@ class KernelListLauncher(object): @param args : kernel arguments. @return OpenCL Event. """ + # if __DEBUG__: + # print "-- FLUSHING OPENCL QUEUE --" + # self.queue.flush() if __VERBOSE__: try: print "OpenCL kernel:", self.kernel[d].function_name, d, args[0], args[1] diff --git a/hysop/gpu/gpu_particle_advection.py b/hysop/gpu/gpu_particle_advection.py index ec1aa8aa4..09548bffa 100644 --- a/hysop/gpu/gpu_particle_advection.py +++ b/hysop/gpu/gpu_particle_advection.py @@ -345,31 +345,35 @@ class GPUParticleAdvection(DiscreteOperator, GPUOperator): self.size_global_alloc += self.velocity.mem_size # Fields on grids - for field in self.fields_on_grid: + for fg in self.fields_on_grid: GPUDiscreteField.fromField(self.cl_env, - field, + fg, self.gpu_precision, layout=False) - if field.allocate(): - self.size_global_alloc += field.mem_size + if fg.allocate(): + self.size_global_alloc += fg.mem_size # Fields on particles self.fields_on_part = {} start = 0 - for f in self.fields_on_grid: - for i in xrange(start, start + f.nb_components): + for fg in self.fields_on_grid: + for i in xrange(start, start + fg.nb_components): if type(self._rwork[i]) is np.ndarray: self._rwork[i] = \ self.cl_env.global_allocation(self._rwork[i]) - self.fields_on_part[f] = self._rwork[start: start + f.nb_components] - start += f.nb_components + self.fields_on_part[fg] = self._rwork[start: start + fg.nb_components] + start += fg.nb_components - # Particles position (if kernel is splitted) + # Particles positions (only used if advection and remesh kernels are splitted) if self._split_kernels: if type(self._rwork[start]) is np.ndarray: self._rwork[start] = \ self.cl_env.global_allocation(self._rwork[start]) self._particle_position = self._rwork[start] + start += 1 + + if start != len(self._rwork): + raise RuntimeError('GPU allocation error!') def _device_buffer_initializations(self): """ @@ -488,8 +492,8 @@ class GPUParticleAdvection(DiscreteOperator, GPUOperator): def _initialize_events(self): # Particle initialisation OpenCL events for each field - for f in self.fields_on_grid: - self._field_events = {f: []} + for fg in self.fields_on_grid: + self._field_events = {fg: []} ## ## Discrete operator interface @@ -526,25 +530,24 @@ class GPUParticleAdvection(DiscreteOperator, GPUOperator): if (step_id<0) or (step_id>=len(self._exec_list)): raise ValueError('step_id is out of bounds.') - # If first direction of advection, wait for work gpu fields - # It avoids wait_for lists to increase indefinitely - # In practice, all events are terminated so wait() resets events list + # in first step, synchronize previous work on gpu fields and clean events if step_id == 0: for v in self.variables: v.clean_events() - exec_list = self._exec_list[step_id] - parameters = self._param_list[step_id] + import copy + parameters = copy.deepcopy(self._param_list[step_id]) parameters.update(extra_params) my_dir = self.direction req_dir = parameters['dir'] if my_dir != req_dir: msg = 'Discrete splitting operator called for wrong direction, \ -splitting step id {} requires direction {} but applied operator \ +splitting step id {} requires direction {} but currently applied operator \ direction is {}!'.format(step_id,MeshDir[req_dir],MeshDir[my_dir]) raise RuntimeError(msg) + exec_list = self._exec_list[step_id] for exe in exec_list: exe(simulation=simulation, **parameters) diff --git a/hysop/gpu/static_gpu_particle_advection.py b/hysop/gpu/static_gpu_particle_advection.py index 650a7ed1e..6c6cb20d4 100644 --- a/hysop/gpu/static_gpu_particle_advection.py +++ b/hysop/gpu/static_gpu_particle_advection.py @@ -31,20 +31,29 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): def _check(self): cls = self.__class__ super(cls,self)._check() + cls = cls.__name__ - if len(self.fields_on_grid)>1: - msg='{} kernel generation backend does only support'.format(cls) \ - + ' one advected field at a time!' - raise NotImplementedError(msg) + # if len(self.fields_on_grid)>1: + # msg='{} kernel generation backend does only support'.format(cls) \ + # + ' one advected field at a time!' + # raise NotImplementedError(msg) + if self.dim not in [2,3]: + msg='{} only supports 2D and 3D domains.'.format(cls) + raise NotImplementedError(msg) + _kernel_cfg = self._kernel_cfg if _kernel_cfg is None: msg='{} requires a kernel configuration (_kernel_cfg) to be present !'\ .format(cls) raise ValueError(msg) - required_kernel_configs = \ - ['transpose_xy','transpose_xz','advec','remesh','advec_and_remesh'] + required_kernel_configs = ['advec','remesh','advec_and_remesh'] + if dim>=2: + required_kernel_configs.append('transpose_xy') + if dim>=3: + required_kernel_configs.append('transpose_xz') + for rkc in required_kernel_configs: msg = "{} requires kernel configuration '{}' but it is not present.".format(cls, rkc) if rkc not in _kernel_cfg.keys(): @@ -54,6 +63,12 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): self._initialize_cl_build_options() self._initialize_cl_size_constants() self._initialize_cl_mesh_info() + + required_components = set() + for fg in self.fields_on_grid: + required_components.add(fg.nb_components) + self.required_components = required_components + def _initialize_cl_build_options(self): self._build_options += "" @@ -219,7 +234,7 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): # Advection build_options = self._build_options + self._size_constants src, is_noBC, vec, f_space = self._kernel_cfg['advec'] - gwi, lwi = f_space(self.v_resol_dir, vec) + gwi, lwi = f_space(self.f_resol_dir, vec) WINb = lwi[0] if self._is_multi_scale: @@ -230,7 +245,7 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): build_options += " -D WITH_NOBC=1" build_options += " -D WI_NB=" + str(WINb) build_options += " -D PART_NB_PER_WI=" - build_options += str(self.v_resol_dir[0] / WINb) + build_options += str(self.f_resol_dir[0] / WINb) # Build code src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower()) @@ -245,21 +260,23 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): if not self._isMultiScale: src = [s for s in src if s.find(Euler.__name__.lower()) < 0] src[-1] = src[-1].replace('advection', 'advection_euler') - # self._compute_advec = self._compute_advec_euler_simpleechelle - prg = self.cl_env.build_src( - src, - build_options, - vec, - nb_remesh_components=self.fields_on_grid[0].nb_components) - self._advec = KernelLauncher( - prg.advection_kernel, self.cl_env.queue, gwi, lwi) + self._advec = {} + for rc in self.required_components: + prg = self.cl_env.build_src( + src, + build_options, + vec, + nb_remesh_components=rc) + + self._advec[rc] = KernelLauncher( + prg.advection_kernel, self.cl_env.queue, gwi, lwi) def _collect_remesh_kernel(self): # remeshing build_options = self._build_options + self._size_constants src, is_noBC, vec, f_space = self._kernel_cfg['remesh'] - gwi, lwi = f_space(self.v_resol_dir, vec) + gwi, lwi = f_space(self.f_resol_dir, vec) WINb = lwi[0] build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper() @@ -267,23 +284,24 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): build_options += " -D WITH_NOBC=1" build_options += " -D WI_NB=" + str(WINb) build_options += " -D PART_NB_PER_WI=" - build_options += str(self.v_resol_dir[0] / WINb) + build_options += str(self.f_resol_dir[0] / WINb) - prg = self.cl_env.build_src( - src, build_options, vec, - nb_remesh_components=self.fields_on_grid[0].nb_components) - self._remesh = KernelLauncher( - prg.remeshing_kernel, self.cl_env.queue, gwi, lwi) + self._remesh = {} + for rc in self.required_components: + prg = self.cl_env.build_src( + src, build_options, vec, + nb_remesh_components=rc) + self._remesh[rc] = KernelLauncher( + prg.remeshing_kernel, self.cl_env.queue, gwi, lwi) def _collect_advec_remesh_kernel(self): """ Compile OpenCL sources for advection and remeshing kernel. """ build_options = self._build_options + self._size_constants - resol_dir = self._reorder_vect(self.velocity_topo.mesh.resolution) src, is_noBC, vec, f_space = self._kernel_cfg['advec_and_remesh'] - gwi, lwi = f_space(resol_dir, vec) + gwi, lwi = f_space(self.f_resol_dir, vec) WINb = lwi[0] build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper() @@ -295,7 +313,7 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): build_options += " -D WITH_NOBC=1" build_options += " -D WI_NB=" + str(WINb) - build_options += " -D PART_NB_PER_WI=" + str(resol_dir[0] / WINb) + build_options += " -D PART_NB_PER_WI=" + str(self.f_resol_dir[0] / WINb) # Build code src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower()) @@ -306,13 +324,14 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): if not self._is_multiScale: src = [s for s in src if s.find(Euler.__name__.lower()) < 0] src[-1] = src[-1].replace('advection', 'advection_euler') - - prg = self.cl_env.build_src( - src, build_options, vec, - nb_remesh_components=self.fields_on_grid[0].nb_components) - - self._advec_and_remesh = KernelLauncher( - prg.advection_and_remeshing, self.cl_env.queue, gwi, lwi) + + self._advec_and_remesh = {} + for rc in self.required_components: + prg = self.cl_env.build_src( + src, build_options, vec, + nb_remesh_components=rc) + self._advec_and_remesh[rc] = KernelLauncher( + prg.advection_and_remeshing, self.cl_env.queue, gwi, lwi) def _collect_user_kernels(self): """ @@ -327,26 +346,26 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): f_resol_dir = self.f_resol_dir v_resol_dir = self.v_resol_dir - workItemNb, gwi, lwi = self.cl_env.get_work_items(resol_dir) - v_workItemNb, gwi, lwi = self.cl_env.get_work_items(v_resol_dir) + f_work_items, gwi, lwi = self.cl_env.get_work_items(f_resol_dir) + v_work_items, gwi, lwi = self.cl_env.get_work_items(v_resol_dir) - build_options += " -D WI_NB=" + str(workItemNb) - build_options += " -D V_WI_NB=" + str(v_workItemNb) + build_options += " -D WI_NB=" + str(f_work_items) + build_options += " -D V_WI_NB=" + str(v_work_items) self._user_prg = self.cl_env.build_src(usr_src, build_options, 1) else: self._user_prg = None def _exec_kernel(self,kernel): - fg0 = self.fields_on_grid[0] - fp0 = self.fields_on_part[fg0] - - for (g,p) in zip(fg0.gpu_data,fp0): - if kernel is self.copy: - evt = kernel.launch_sizes_in_args(p,g,wait_for=fg0.events) - else: - evt = kernel(g,p,wait_for=fg0.events) - self._field_events[fg0].append(evt) + for (fg,fp) in self.fields_on_part.iteritems(): + evts = [] + for (g,p) in zip(fg.gpu_data,fp): + if kernel is self.copy: + evt = kernel.launch_sizes_in_args(p,g,wait_for=fg.events) + else: + evt = kernel(g,p,wait_for=fg.events) + evts.append(evt) + fg.events += evts def _do_copy(self,**kargs): self._exec_kernel(self.copy) @@ -360,54 +379,49 @@ class StaticGPUParticleAdvection(GPUParticleAdvection): self._exec_kernel(self.transpose_zx) def _do_compute_1k_monoscale(self, dt): - velocity = self.velocity - fg0 = self.fields_on_grid[0] - fp0 = self.fields_on_part[fg0] - nbc = fg0.nb_components - - wait_evts = self.velocity.events + self._field_events[fg0] + fg0.events - - args = tuple( - [velocity.gpu_data[self.direction]] - + [fp0[i] for i in xrange(nbc)] - + [fg0.gpu_data[i] for i in xrange(nbc)] - + [self.gpu_precision(dt), self._cl_mesh_info] - ) - evt = self._advec_and_remesh(*args, wait_for=wait_evts) - - fg0.events.append(evt) - self._field_events[fg0] = [] + velocity = self.velocity + for (fg,fp) in self.fields_on_part.iteritems(): + nbc = fg.nb_components + wait_evts = velocity.events + fg.events + + args = tuple( + [velocity.gpu_data[self.direction]] + + [fp[i] for i in xrange(nbc)] + + [fg.gpu_data[i] for i in xrange(nbc)] + + [self.gpu_precision(dt)] + + [self._cl_mesh_info] + ) + + evt = self._advec_and_remesh[nbc](*args, wait_for=wait_evts) + fg.events.append(evt) def _do_compute_2k_monoscale(self, dt): - velocity = self.velocity - fg0 = self.fields_on_grid[0] - fp0 = self.fields_on_part[fg0] - nbc = fg0.nb_components - # Advection - wait_evts = velocity.events + self._field_events[fg0] - args = tuple([ - velocity.gpu_data[self.direction], - self._particle_position, - self.gpu_precision(dt), - self._cl_mesh_info - ]) - evt = self._advec(*args,wait_for=wait_evts) - self._field_events[fg0].append(evt) - - # Remesh - wait_evts = self._field_events[fg0] + fg0.events - args = tuple( - [self._particle_position] - + [fp0[i] for i in xrange(nbc)] - + [fg0.gpu_data[i] for i in xrange(nbc)] - + [self._cl_mesh_info] - ) - evt = self._remesh(*args, wait_for=wait_evts) - fg0.events.append(evt) - - self._field_events[fg0] = [] + velocity = self.velocity + for (fg,fp) in self.fields_on_part.iteritems(): + nbc = fg.nb_components + wait_evts = velocity.events + fg.events + + # Advection + args = tuple([ + velocity.gpu_data[self.direction], + self._particle_position, + self.gpu_precision(dt), + self._cl_mesh_info + ]) + advec_evt = self._advec[nbc](*args,wait_for=wait_evts) + fg.events.append(advec_evt) + + # Remesh + args = tuple( + [self._particle_position] + + [fp[i] for i in xrange(nbc)] + + [fg.gpu_data[i] for i in xrange(nbc)] + + [self._cl_mesh_info] + ) + remesh_evt = self._remesh[nbc](*args, wait_for=[advec_evt]) + fg.events.append(remesh_evt) @staticmethod def supports_multiscale(): @@ -453,9 +467,9 @@ if __name__=='__main__': return init - GHOSTS = 3 - NSCALARS = 0 - resolution= [33,65,129][:dim] + GHOSTS = 0 + NSCALARS = 2 + resolution= [33,33,33][:dim] ghosts = [GHOSTS]*dim d3d = Discretization(resolution=resolution, ghosts=None) d3dg = Discretization(resolution=resolution, ghosts=ghosts) @@ -514,7 +528,7 @@ if __name__=='__main__': A[2].apply(simu,2) A[1].apply(simu,3) A[0].apply(simu,4) - + for v in A[0].variables: v.to_host() -- GitLab