From 78938143da029dceeccdc3e5dc25bbbe53dce0dc Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr>
Date: Fri, 23 Dec 2016 16:09:23 +0100
Subject: [PATCH] working advection, periodic non cached

---
 .../codegen/kernels/directional_advection.py  | 81 ++++++++++++-------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/hysop/codegen/kernels/directional_advection.py b/hysop/codegen/kernels/directional_advection.py
index ada57d960..c2f36e6fd 100644
--- a/hysop/codegen/kernels/directional_advection.py
+++ b/hysop/codegen/kernels/directional_advection.py
@@ -44,15 +44,16 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
                 ftype[0],nparticles,min_ghosts)
 
     @staticmethod
-    def min_ghosts(dx, max_velocity):
-        return math.ceil(max_velocity / dx)
+    def min_ghosts(max_dt, velocity_dx, max_velocity):
+        return int(math.ceil(max_dt*umax/float(velocity_dx)))
    
     def __init__(self, typegen, work_dim,  ftype,
                        is_cached, rk_scheme,
                        boundary, nparticles, 
                        min_ghosts=0,
                        symbolic_mode = False,
-                       known_vars = None):
+                       debug_mode    = False, 
+                       known_vars    = None):
         
         assert work_dim>0 and work_dim<=3
         assert nparticles in [1,2,4,8,16]
@@ -79,10 +80,10 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
         name = DirectionalAdvectionKernel.codegen_name(ftype,is_cached,rk_scheme,nparticles,min_ghosts)
 
         kernel_reqs = self.build_requirements(typegen, work_dim, itype, ftype, is_cached, 
-                rk_scheme, boundary, nparticles, symbolic_mode, storage, is_periodic)
+                rk_scheme, boundary, nparticles, symbolic_mode, storage, is_periodic, known_vars)
 
         kernel_args = self.gen_kernel_arguments(typegen, work_dim, itype, ftype, kernel_reqs, is_cached,
-                local_size_known)
+                local_size_known, debug_mode)
         
         super(DirectionalAdvectionKernel,self).__init__(
                 name=name,
@@ -127,9 +128,11 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
     #return global_work_size from effective work_size without
     # taking into account local_work_size alignment
     @staticmethod
-    def get_max_global_size(work_size, work_load):
+    def get_max_global_size(work_size, work_load, nparticles):
         work_size       = np.asarray(work_size)
         work_load       = np.asarray(work_load)
+
+        assert work_load[0] == 1
         
         global_size = work_size.copy()
         global_size = ((global_size+work_load-1)/work_load)
@@ -137,17 +140,28 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
 
     #return global_work_size from effective work_size and given local_work_size
     # global_work_size will be a multiple of local_work_size
-    def get_global_size(self, work_size, local_work_size, work_load=[1,1,1]):
+    def get_global_size(self, work_size, local_work_size, work_load=None):
+        work_dim        = self.work_dim
+        work_load       = [1]*work_dim if (work_load is None) else work_load
+
         work_size       = np.asarray(work_size)
         work_load       = np.asarray(work_load)
         local_work_size = np.asarray(local_work_size)
+
+        nparticles = self.nparticles
         
-        assert (local_work_size[1] == 1) and (local_work_size[2]==1)
+        for var in [work_size, work_load, local_work_size]:
+            msg = 'Dimension mismatch for variable {}, expected work_dim is {}.' 
+            msg = msg.format(var, work_dim)
+            if var.size != work_dim:
+                raise ValueError(msg)
+        for i in xrange(1,work_dim):
+            assert (local_work_size[i] == 1), 'local_work_size error!'
 
         if 'local_size' in self.known_vars:
-            assert (self.known_vars['local_size'] == local_work_size).all()
+            assert (self.known_vars['local_size'] == local_work_size).all(), 'local_work_size mismatch!'
 
-        max_global_size = self.get_max_global_size(work_size, work_load)
+        max_global_size = self.get_max_global_size(work_size, work_load, nparticles)
         global_size = ((max_global_size+local_work_size-1)/local_work_size) * local_work_size
 
         return global_size
@@ -178,20 +192,15 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
         return (sc,dc,tc)
 
     def build_requirements(self,typegen,work_dim,itype,ftype,is_cached,rk_scheme,
-            boundary,nparticles,force_symbolic,storage,is_periodic):
+            boundary,nparticles,force_symbolic,storage,is_periodic,known_vars):
         tg=typegen 
         reqs = WriteOnceDict()
         
-        # compute_id = ComputeIndexFunction(typegen=typegen, dim=work_dim, itype='int', 
-                # wrap=is_periodic)
-        # reqs['compute_id'] = compute_id
-
         mesh_base_struct = MeshBaseStruct(typegen=typegen, typedef='MeshBase_s')
         reqs['MeshBaseStruct'] = mesh_base_struct
 
         mesh_info_struct = MeshInfoStruct(typegen=typegen, typedef='MeshInfo_s')
         reqs['MeshInfoStruct'] = mesh_info_struct
-        
          
         advection_rhs = DirectionalAdvectionRhsFunction(typegen=typegen, work_dim=work_dim, 
                 ftype=ftype, is_cached=is_cached,
@@ -212,7 +221,7 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
         return reqs
     
     def gen_kernel_arguments(self, typegen, work_dim, itype, ftype, requirements,is_cached,
-            local_size_known):
+            local_size_known, debug_mode):
         
         kargs = ArgDict()
         kargs['dt'] = CodegenVariable(ctype=ftype,name='dt',typegen=typegen,
@@ -225,12 +234,18 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
                     restrict=True,ptr=True,const=True,add_impl_const=True)
         kargs['position'] = CodegenVariable(storage=_global,name='position',ctype=ftype,typegen=typegen,
                     restrict=True,ptr=True,const=False,add_impl_const=True)
+        
+        if debug_mode:
+            kargs['dbg0'] = CodegenVariable(storage=_global,name='dbg0',ctype=itype,typegen=typegen,
+                        restrict=True,ptr=True,const=False,add_impl_const=True)
+            kargs['dbg1'] = CodegenVariable(storage=_global,name='dbg1',ctype=itype,typegen=typegen,
+                        restrict=True,ptr=True,const=False,add_impl_const=True)
 
         kargs['mesh_info'] = requirements['MeshInfoStruct'].build_codegen_variable(const=True,name='mesh_info')
 
         if is_cached and not local_size_known:
-             kargs['buffer'] = CodegenVariable(storage=_local,ctype=ftype, add_impl_const=True,
-                     name='buffer', ptr=True, restrict=True, typegen=typegen, nl=False)
+             kargs['Vc'] = CodegenVariable(storage=_local,ctype=ftype, add_impl_const=True,
+                     name='Vc', ptr=True, restrict=True, typegen=typegen, nl=False)
 
         return kargs
     
@@ -268,8 +283,6 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
         
         velocity = s.vars['velocity']
         position = s.vars['position']
-        if is_cached:
-            shared = s.vars['buffer']
         
         grid_size           = mesh_info['local_mesh']['resolution'].view(
                 'grid_size', slice(None,work_dim))
@@ -291,6 +304,16 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
         pid        = CodegenVectorClBuiltin('pid',     itype, nparticles, typegen=tg)
         poffset    = CodegenVectorClBuiltin('poffset', itype, nparticles, typegen=tg)
         
+        if is_cached:
+            cache_ghosts = CodegenVariable('cache_ghosts', itype, typegen=tg, value=min_ghosts)
+            cache_width  = CodegenVariable('cache_width', itype, typegen=tg)
+            if local_size_known:
+                local_size = s.known_vars['local_size']
+                Vc = CodegenArray(name='Vc',dim=1,ctype=ftype,typegen=tg,
+                            shape=(local_size.value[0]+2*min_ghosts,), storage='__local')
+            else:
+                Vc = s.vars['Vc']
+        
         part_ftype = X.ctype
         part_itype = pid.ctype
         
@@ -377,13 +400,15 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
                 s.append(code)
                 
                 s.append('{} += {};'.format(X(), xmin()))
-            
-                if nparticles>1:
-                    code = 'vstore{}({}, {}, {}+{});'.format(nparticles,X(), 
-                            global_id[0], position(), line_index())
-                else:
-                    code = '{} = {};'.format(position['{}+{}'.format(line_index(),pid())],X())
-                s.append(code)
+                
+                with s._if_(active()):
+                    if nparticles>1:
+                        code = 'vstore{}({}, {}, {}+{});'.format(nparticles,X(), 
+                                global_id[0], position(), line_index())
+                        s.append(code)
+                    else:
+                        code = '{} = {};'.format(position['{}+{}'.format(line_index(),pid())],X())
+                        s.append(code)
 
 
 
-- 
GitLab