From 9344740c56fa64d9b0e2d073cbb375ae5aada9fb Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr>
Date: Mon, 26 Sep 2016 18:24:32 +0200
Subject: [PATCH] advection update

---
 hysop/gpu/gpu_kernel.py                    |   5 +-
 hysop/gpu/gpu_particle_advection.py        |  37 ++--
 hysop/gpu/static_gpu_particle_advection.py | 202 +++++++++++----------
 3 files changed, 132 insertions(+), 112 deletions(-)

diff --git a/hysop/gpu/gpu_kernel.py b/hysop/gpu/gpu_kernel.py
index aa8d50e32..4452e3804 100644
--- a/hysop/gpu/gpu_kernel.py
+++ b/hysop/gpu/gpu_kernel.py
@@ -2,7 +2,7 @@
 @file gpu_kernel.py
 """
 from hysop.constants import debug, S_DIR
-from hysop import __VERBOSE__
+from hysop import __VERBOSE__, __DEBUG__
 from hysop.gpu import cl, CL_PROFILE
 from hysop.tools.profiler import FProfiler
 
@@ -71,6 +71,9 @@ class KernelListLauncher(object):
         @param args : kernel arguments.
         @return OpenCL Event.
         """
+        # if __DEBUG__:
+            # print "-- FLUSHING OPENCL QUEUE --"
+            # self.queue.flush()
         if __VERBOSE__:
             try:
                 print "OpenCL kernel:", self.kernel[d].function_name, d, args[0], args[1]
diff --git a/hysop/gpu/gpu_particle_advection.py b/hysop/gpu/gpu_particle_advection.py
index ec1aa8aa4..09548bffa 100644
--- a/hysop/gpu/gpu_particle_advection.py
+++ b/hysop/gpu/gpu_particle_advection.py
@@ -345,31 +345,35 @@ class GPUParticleAdvection(DiscreteOperator, GPUOperator):
             self.size_global_alloc += self.velocity.mem_size
         
         # Fields on grids
-        for field in self.fields_on_grid:
+        for fg in self.fields_on_grid:
             GPUDiscreteField.fromField(self.cl_env,
-                                       field,
+                                       fg,
                                        self.gpu_precision,
                                        layout=False)
-            if field.allocate():
-                self.size_global_alloc += field.mem_size
+            if fg.allocate():
+                self.size_global_alloc += fg.mem_size
 
         # Fields on particles
         self.fields_on_part = {}
         start = 0
-        for f in self.fields_on_grid:
-            for i in xrange(start, start + f.nb_components):
+        for fg in self.fields_on_grid:
+            for i in xrange(start, start + fg.nb_components):
                 if type(self._rwork[i]) is np.ndarray:
                     self._rwork[i] = \
                         self.cl_env.global_allocation(self._rwork[i])
-            self.fields_on_part[f] = self._rwork[start: start + f.nb_components]
-            start += f.nb_components
+            self.fields_on_part[fg] = self._rwork[start: start + fg.nb_components]
+            start += fg.nb_components
 
-        # Particles position (if kernel is splitted)
+        # Particles positions (only used if advection and remesh kernels are splitted)
         if self._split_kernels:
             if type(self._rwork[start]) is np.ndarray:
                 self._rwork[start] = \
                     self.cl_env.global_allocation(self._rwork[start])
             self._particle_position = self._rwork[start]
+            start += 1
+            
+        if start != len(self._rwork):
+            raise RuntimeError('GPU allocation error!')
     
     def _device_buffer_initializations(self):
         """
@@ -488,8 +492,8 @@ class GPUParticleAdvection(DiscreteOperator, GPUOperator):
 
     def _initialize_events(self):
         # Particle initialisation OpenCL events for each field
-        for f in self.fields_on_grid:
-            self._field_events = {f: []}
+        for fg in self.fields_on_grid:
+            self._field_events = {fg: []}
     
 ## 
 ## Discrete operator interface
@@ -526,25 +530,24 @@ class GPUParticleAdvection(DiscreteOperator, GPUOperator):
         if (step_id<0) or (step_id>=len(self._exec_list)): 
             raise ValueError('step_id is out of bounds.')
 
-        # If first direction of advection, wait for work gpu fields
-        # It avoids wait_for lists to increase indefinitely
-        # In practice, all events are terminated so wait() resets events list
+        # in first step, synchronize previous work on gpu fields and clean events
         if step_id == 0:
             for v in self.variables:
                 v.clean_events()
     
-        exec_list  = self._exec_list[step_id]
-        parameters = self._param_list[step_id]
+        import copy
+        parameters = copy.deepcopy(self._param_list[step_id])
         parameters.update(extra_params)
        
         my_dir  = self.direction
         req_dir = parameters['dir']
         if my_dir != req_dir:
             msg = 'Discrete splitting operator called for wrong direction, \
-splitting step id {} requires direction {} but applied operator \
+splitting step id {} requires direction {} but currently applied operator \
 direction is {}!'.format(step_id,MeshDir[req_dir],MeshDir[my_dir])
             raise RuntimeError(msg)
 
+        exec_list = self._exec_list[step_id]
         for exe in exec_list:
             exe(simulation=simulation, **parameters)
     
diff --git a/hysop/gpu/static_gpu_particle_advection.py b/hysop/gpu/static_gpu_particle_advection.py
index 650a7ed1e..6c6cb20d4 100644
--- a/hysop/gpu/static_gpu_particle_advection.py
+++ b/hysop/gpu/static_gpu_particle_advection.py
@@ -31,20 +31,29 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
     def _check(self):
         cls = self.__class__
         super(cls,self)._check()
+        cls = cls.__name__
         
-        if len(self.fields_on_grid)>1:
-            msg='{} kernel generation backend does only support'.format(cls) \
-                + ' one advected field at a time!'
-            raise NotImplementedError(msg)
+        # if len(self.fields_on_grid)>1:
+            # msg='{} kernel generation backend does only support'.format(cls) \
+                # + ' one advected field at a time!'
+            # raise NotImplementedError(msg)
     
+        if self.dim not in [2,3]:
+            msg='{} only supports 2D and 3D domains.'.format(cls)
+            raise NotImplementedError(msg)
+
         _kernel_cfg = self._kernel_cfg
         if _kernel_cfg is None:
             msg='{} requires a kernel configuration (_kernel_cfg) to be present !'\
                     .format(cls)
             raise ValueError(msg)
 
-        required_kernel_configs = \
-                ['transpose_xy','transpose_xz','advec','remesh','advec_and_remesh']
+        required_kernel_configs = ['advec','remesh','advec_and_remesh']
+        if dim>=2: 
+            required_kernel_configs.append('transpose_xy')
+        if dim>=3: 
+            required_kernel_configs.append('transpose_xz')
+
         for rkc in required_kernel_configs:
             msg = "{} requires kernel configuration '{}' but it is not present.".format(cls, rkc)
             if rkc not in _kernel_cfg.keys():
@@ -54,6 +63,12 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
         self._initialize_cl_build_options()
         self._initialize_cl_size_constants()
         self._initialize_cl_mesh_info()
+        
+        required_components = set()
+        for fg in self.fields_on_grid:
+            required_components.add(fg.nb_components)
+        self.required_components = required_components
+        
 
     def _initialize_cl_build_options(self):
         self._build_options += ""
@@ -219,7 +234,7 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
         # Advection
         build_options = self._build_options + self._size_constants
         src, is_noBC, vec, f_space = self._kernel_cfg['advec']
-        gwi, lwi = f_space(self.v_resol_dir, vec)
+        gwi, lwi = f_space(self.f_resol_dir, vec)
         WINb = lwi[0]
 
         if self._is_multi_scale:
@@ -230,7 +245,7 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
             build_options += " -D WITH_NOBC=1"
         build_options += " -D WI_NB=" + str(WINb)
         build_options += " -D PART_NB_PER_WI="
-        build_options += str(self.v_resol_dir[0] / WINb)
+        build_options += str(self.f_resol_dir[0] / WINb)
         
         # Build code
         src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower())
@@ -245,21 +260,23 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
             if not self._isMultiScale:
                 src = [s for s in src if s.find(Euler.__name__.lower()) < 0]
                 src[-1] = src[-1].replace('advection', 'advection_euler')
-                # self._compute_advec = self._compute_advec_euler_simpleechelle
-        prg = self.cl_env.build_src(
-            src,
-            build_options,
-            vec,
-            nb_remesh_components=self.fields_on_grid[0].nb_components)
 
-        self._advec = KernelLauncher(
-            prg.advection_kernel, self.cl_env.queue, gwi, lwi)
+        self._advec = {}
+        for rc in self.required_components:
+            prg = self.cl_env.build_src(
+                src,
+                build_options,
+                vec,
+                nb_remesh_components=rc)
+
+            self._advec[rc] = KernelLauncher(
+                prg.advection_kernel, self.cl_env.queue, gwi, lwi)
 
     def _collect_remesh_kernel(self):
         # remeshing
         build_options = self._build_options + self._size_constants
         src, is_noBC, vec, f_space = self._kernel_cfg['remesh']
-        gwi, lwi = f_space(self.v_resol_dir, vec)
+        gwi, lwi = f_space(self.f_resol_dir, vec)
         WINb = lwi[0]
 
         build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper()
@@ -267,23 +284,24 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
             build_options += " -D WITH_NOBC=1"
         build_options += " -D WI_NB=" + str(WINb)
         build_options += " -D PART_NB_PER_WI="
-        build_options += str(self.v_resol_dir[0] / WINb)
+        build_options += str(self.f_resol_dir[0] / WINb)
 
-        prg = self.cl_env.build_src(
-            src, build_options, vec,
-            nb_remesh_components=self.fields_on_grid[0].nb_components)
-        self._remesh = KernelLauncher(
-            prg.remeshing_kernel, self.cl_env.queue, gwi, lwi)
+        self._remesh = {}
+        for rc in self.required_components:
+            prg = self.cl_env.build_src(
+                src, build_options, vec,
+                nb_remesh_components=rc)
+            self._remesh[rc] = KernelLauncher(
+                prg.remeshing_kernel, self.cl_env.queue, gwi, lwi)
 
     def _collect_advec_remesh_kernel(self):
         """
         Compile OpenCL sources for advection and remeshing kernel.
         """
         build_options = self._build_options + self._size_constants
-        resol_dir = self._reorder_vect(self.velocity_topo.mesh.resolution)
 
         src, is_noBC, vec, f_space = self._kernel_cfg['advec_and_remesh']
-        gwi, lwi = f_space(resol_dir, vec)
+        gwi, lwi = f_space(self.f_resol_dir, vec)
 
         WINb = lwi[0]
         build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper()
@@ -295,7 +313,7 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
             build_options += " -D WITH_NOBC=1"
 
         build_options += " -D WI_NB=" + str(WINb)
-        build_options += " -D PART_NB_PER_WI=" + str(resol_dir[0] / WINb)
+        build_options += " -D PART_NB_PER_WI=" + str(self.f_resol_dir[0] / WINb)
         
         # Build code
         src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower())
@@ -306,13 +324,14 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
             if not self._is_multiScale:
                 src = [s for s in src if s.find(Euler.__name__.lower()) < 0]
                 src[-1] = src[-1].replace('advection', 'advection_euler')
-
-        prg = self.cl_env.build_src(
-            src, build_options, vec,
-            nb_remesh_components=self.fields_on_grid[0].nb_components)
-
-        self._advec_and_remesh = KernelLauncher(
-            prg.advection_and_remeshing, self.cl_env.queue, gwi, lwi)
+        
+        self._advec_and_remesh = {}
+        for rc in self.required_components:
+            prg = self.cl_env.build_src(
+                src, build_options, vec,
+                nb_remesh_components=rc)
+            self._advec_and_remesh[rc] = KernelLauncher(
+                prg.advection_and_remeshing, self.cl_env.queue, gwi, lwi)
        
     def _collect_user_kernels(self):
         """
@@ -327,26 +346,26 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
             f_resol_dir = self.f_resol_dir 
             v_resol_dir = self.v_resol_dir
             
-            workItemNb, gwi, lwi   = self.cl_env.get_work_items(resol_dir)
-            v_workItemNb, gwi, lwi = self.cl_env.get_work_items(v_resol_dir)
+            f_work_items, gwi, lwi = self.cl_env.get_work_items(f_resol_dir)
+            v_work_items, gwi, lwi = self.cl_env.get_work_items(v_resol_dir)
 
-            build_options += " -D WI_NB="   + str(workItemNb)
-            build_options += " -D V_WI_NB=" + str(v_workItemNb)
+            build_options += " -D WI_NB="   + str(f_work_items)
+            build_options += " -D V_WI_NB=" + str(v_work_items)
 
             self._user_prg = self.cl_env.build_src(usr_src, build_options, 1)
         else:
             self._user_prg = None
         
     def _exec_kernel(self,kernel):
-        fg0 = self.fields_on_grid[0]
-        fp0 = self.fields_on_part[fg0]
-
-        for (g,p) in zip(fg0.gpu_data,fp0):
-            if kernel is self.copy:
-                evt = kernel.launch_sizes_in_args(p,g,wait_for=fg0.events)
-            else:
-                evt = kernel(g,p,wait_for=fg0.events)
-            self._field_events[fg0].append(evt)
+        for (fg,fp) in self.fields_on_part.iteritems():
+            evts = []
+            for (g,p) in zip(fg.gpu_data,fp):
+                if kernel is self.copy:
+                    evt = kernel.launch_sizes_in_args(p,g,wait_for=fg.events)
+                else:
+                    evt = kernel(g,p,wait_for=fg.events)
+                evts.append(evt)
+            fg.events += evts
         
     def _do_copy(self,**kargs): 
         self._exec_kernel(self.copy)
@@ -360,54 +379,49 @@ class StaticGPUParticleAdvection(GPUParticleAdvection):
         self._exec_kernel(self.transpose_zx)
     
     def _do_compute_1k_monoscale(self, dt):
-        velocity  = self.velocity
-        fg0       = self.fields_on_grid[0]
-        fp0       = self.fields_on_part[fg0]
-        nbc = fg0.nb_components
-        
-        wait_evts = self.velocity.events + self._field_events[fg0] + fg0.events
-
-        args = tuple(
-              [velocity.gpu_data[self.direction]] 
-            + [fp0[i]          for i in xrange(nbc)] 
-            + [fg0.gpu_data[i] for i in xrange(nbc)] 
-            + [self.gpu_precision(dt), self._cl_mesh_info]
-        )
 
-        evt = self._advec_and_remesh(*args, wait_for=wait_evts)
-
-        fg0.events.append(evt)
-        self._field_events[fg0] = []
+        velocity  = self.velocity
+        for (fg,fp) in self.fields_on_part.iteritems():
+            nbc = fg.nb_components
+            wait_evts = velocity.events + fg.events
+
+            args = tuple(
+                  [velocity.gpu_data[self.direction]] 
+                + [fp[i]          for i in xrange(nbc)] 
+                + [fg.gpu_data[i] for i in xrange(nbc)] 
+                + [self.gpu_precision(dt)]
+                + [self._cl_mesh_info]
+            )
+
+            evt = self._advec_and_remesh[nbc](*args, wait_for=wait_evts)
+            fg.events.append(evt)
     
     def _do_compute_2k_monoscale(self, dt):
-        velocity  = self.velocity
-        fg0       = self.fields_on_grid[0]
-        fp0       = self.fields_on_part[fg0]
-        nbc = fg0.nb_components
 
-        # Advection
-        wait_evts = velocity.events + self._field_events[fg0]
-        args = tuple([
-            velocity.gpu_data[self.direction],
-            self._particle_position,
-            self.gpu_precision(dt),
-            self._cl_mesh_info
-        ])
-        evt = self._advec(*args,wait_for=wait_evts)
-        self._field_events[fg0].append(evt)
-
-        # Remesh
-        wait_evts = self._field_events[fg0] + fg0.events
-        args = tuple(
-              [self._particle_position]
-            + [fp0[i]          for i in xrange(nbc)]
-            + [fg0.gpu_data[i] for i in xrange(nbc)]
-            + [self._cl_mesh_info]
-        )
-        evt = self._remesh(*args, wait_for=wait_evts)
-        fg0.events.append(evt)
-
-        self._field_events[fg0] = []
+        velocity  = self.velocity
+        for (fg,fp) in self.fields_on_part.iteritems():
+            nbc = fg.nb_components
+            wait_evts = velocity.events + fg.events
+
+            # Advection
+            args = tuple([
+                velocity.gpu_data[self.direction],
+                self._particle_position,
+                self.gpu_precision(dt),
+                self._cl_mesh_info
+            ])
+            advec_evt = self._advec[nbc](*args,wait_for=wait_evts)
+            fg.events.append(advec_evt)
+
+            # Remesh
+            args = tuple(
+                  [self._particle_position]
+                + [fp[i]          for i in xrange(nbc)]
+                + [fg.gpu_data[i] for i in xrange(nbc)]
+                + [self._cl_mesh_info]
+            )
+            remesh_evt = self._remesh[nbc](*args, wait_for=[advec_evt])
+            fg.events.append(remesh_evt)
     
     @staticmethod
     def supports_multiscale():
@@ -453,9 +467,9 @@ if __name__=='__main__':
         return init
 
     
-    GHOSTS    = 3
-    NSCALARS  = 0
-    resolution= [33,65,129][:dim]
+    GHOSTS    = 0
+    NSCALARS  = 2
+    resolution= [33,33,33][:dim]
     ghosts    = [GHOSTS]*dim
     d3d       = Discretization(resolution=resolution, ghosts=None)
     d3dg      = Discretization(resolution=resolution, ghosts=ghosts)
@@ -514,7 +528,7 @@ if __name__=='__main__':
             A[2].apply(simu,2)
             A[1].apply(simu,3)
             A[0].apply(simu,4)
-    
+
     for v in A[0].variables:
         v.to_host()
     
-- 
GitLab