advection 2d debug

2315a1db · Jean-Baptiste Keck · 2c816085 · 2315a1db · 2315a1db · 2315a1db
Commit 2315a1db authored 8 years ago by Jean-Baptiste Keck
--- a/examples/advection_gpu.py
+++ b/examples/advection_gpu.py
@@ -139,21 +139,26 @@ if __name__=='__main__':
    autotuner_config = AutotunerConfig(autotuner_flag=AutotunerFlags.PATIENT, prune_threshold=1.20, override_cache=False)
+    rk_scheme = ExplicitRungeKutta('Euler')
    method = {
            Backend: Backend.OPENCL,
-            TimeIntegrator: RK2,
+            TimeIntegrator: rk_scheme,
            Remesh: L2_1,
            ExtraArgs: {
                'autotuner_config':autotuner_config,
                'use_builtin_copy':True,
-                'stretching': {
-                    'rk_scheme': ExplicitRungeKutta('Euler'),
-                    'formulation': StretchingFormulation.CONSERVATIVE,
-                    'boundary': BoundaryCondition.PERIODIC, 
-                    'order':4
-                }
            }
        }
+    if DIM==3:
+        method[ExtraArgs]['stretching'] = \
+            {
+                'rk_scheme': rk_scheme,
+                'formulation': StretchingFormulation.CONSERVATIVE,
+                'boundary': BoundaryCondition.PERIODIC, 
+                'order':4
+            }
    advected_fields = [vorti]+scalars
    variables = dict(zip(advected_fields, [f_topo]*len(advected_fields)))

--- a/hysop/codegen/kernels/directional_advection.py
+++ b/hysop/codegen/kernels/directional_advection.py
@@ -173,7 +173,7 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
    #return a tuple of required (static,dynamic,total) cache bytes per workgroup
    def required_workgroup_cache_size(self, local_work_size):
-        work_dim             = self.work_dim
+        work_dim        = self.work_dim
        ftype           = self.ftype
        is_cached       = self.is_cached
        flt_bytes       = self.typegen.FLT_BYTES[ftype]
@@ -184,7 +184,7 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
        if is_cached: 
            count = self.nparticles*local_work_size[0]+2*self.min_ghosts
            if 'local_size' in self.known_vars:
-                assert (self.known_vars['local_size'] == local_work_size).all()
+                assert (self.known_vars['local_size'] == local_work_size[:work_dim]).all()
                sc += count
            else:
                dc += count
@@ -664,7 +664,8 @@ class DirectionalAdvectionKernel(KernelCodeGenerator):
            kernel_launcher = KernelLauncher(kernel, queue, list(gwi), list(lwi))
            total_work = work_size[0]*work_size[1]*work_size[2]
-            per_work_statistics = codegen.per_work_statistics()
+            # per_work_statistics = codegen.per_work_statistics()
+            per_work_statistics = None
            cache_info = codegen.required_workgroup_cache_size(lwi)

--- a/hysop/gpu/gpu_particle_advection_dir.py
+++ b/hysop/gpu/gpu_particle_advection_dir.py
@@ -18,7 +18,7 @@ from hysop.codegen.structs.mesh_info import MeshInfoStruct, MeshState, MeshDir
 from hysop.methods_keys import TimeIntegrator, Remesh, ExtraArgs, \
        DeviceSupport, Splitting, MultiScale, Interpolation, Precision, \
-        StretchingFormulation, DirectionalSplitting, Backend
+        StretchingFormulation, DirectionalSplitting, Backend, ExplicitRungeKutta
 from hysop.numerics.odesolvers    import Euler, RK2, RK3, RK4
 from hysop.numerics.interpolation import Linear
@@ -30,6 +30,12 @@ from hysop.numerics.remeshing import L8_4
 from hysop.constants import np
+class InstanceOf(object):
+    def __init__(self, cls):
+        self.cls = cls
+    def match_instance(self, obj):
+        return isinstance(obj,self.cls)
 class GPUParticleAdvectionDir(ParticleAdvectionDir, GPUOperator):
    """
@@ -44,7 +50,7 @@ class GPUParticleAdvectionDir(ParticleAdvectionDir, GPUOperator):
            Interpolation:  Linear,
            Backend:        Backend.OPENCL,
            DeviceSupport:  DeviceSupport.DEVICE_GPU,
-            Splitting:      DirectionalSplitting.STRANG_SECOND_ORDER,
+            Splitting:      DirectionalSplitting.STRANG_FIRST_ORDER,
            Remesh:         L2_1, 
            MultiScale:     L2_1,
            Precision:      HYSOP_REAL,
@@ -63,7 +69,7 @@ class GPUParticleAdvectionDir(ParticleAdvectionDir, GPUOperator):
        }
    _valid_method_values = {
-        TimeIntegrator: [Euler,RK2,RK3,RK4], 
+        TimeIntegrator: [Euler,RK2,RK3,RK4,InstanceOf(ExplicitRungeKutta)], 
        Interpolation:  [Linear],
        Remesh:         [L2_1,L2_2,L2_3,L2_4,L4_2,L4_3,L4_4,L6_3,L6_4,L6_5,L6_6,L8_4], 
        DeviceSupport:  [DeviceSupport.DEVICE_GPU], 
@@ -91,10 +97,14 @@ class GPUParticleAdvectionDir(ParticleAdvectionDir, GPUOperator):
                        msg="WARNING: Unknown extra arg '{}', ' \
                            + 'valid values are:\n\t{}".format(ke,vke)
                        print msg
-            elif (valid_method_values[k] is not None) and \
+            elif (valid_method_values[k] is not None):
-                    (user_method[k] not in valid_method_values[k]):
+                valid_vals = [v for v in valid_method_values[k]     if not isinstance(v,InstanceOf)]
-                msg = "Unknown method value '{}', valid values for key '{}' are:\n\t{}"
+                valid_cls  = [v.cls for v in valid_method_values[k] if     isinstance(v,InstanceOf)]
-                raise ValueError(msg.format(user_method[k],k,valid_method_values[k]))
+                if (user_method[k] not in valid_vals \
+                        and user_method[k].__class__ not in valid_cls):
+                    pass
+                    msg = "Unknown method value '{}', valid values for key '{}' are:\n\t{}"
+                    raise ValueError(msg.format(user_method[k],k,valid_method_values[k]))
        method = default_method.copy()
        method.update(user_method)
@@ -270,21 +280,35 @@ class GPUParticleAdvectionDir(ParticleAdvectionDir, GPUOperator):
        direction = self.direction
        smethod = self.method[Splitting]
-        if smethod==DirectionalSplitting.STRANG_FIRST_ORDER:
-            is_XY_needed = (dim>1) and (direction == MeshDir.Y)
+        is_XY_needed = False
-            is_YX_needed = (dim>1) and (direction == MeshDir.Z)
+        is_YX_needed = False
-            is_XZ_needed = (dim>2) and (direction == MeshDir.Z) \
+        is_XZ_needed = False
-                    and (not self._is_distributed)
+        is_ZX_needed = False
-            is_ZX_needed = (dim>2) and (direction == MeshDir.Z) \
-                    and (not self._is_distributed)
+        if dim==2:
-        elif smethod==DirectionalSplitting.STRANG_SECOND_ORDER:
+            if smethod==DirectionalSplitting.STRANG_FIRST_ORDER:
-            is_XY_needed = (dim>1) and (direction == MeshDir.Y)
+                is_XY_needed = (dim>1) and (direction == MeshDir.Y)
-            is_YX_needed = (dim>1) and (direction == MeshDir.X)
+                is_YX_needed = (dim>1) and (direction == MeshDir.Y)
+            elif smethod==DirectionalSplitting.STRANG_SECOND_ORDER:
-            is_XZ_needed = (dim>2) and (direction == MeshDir.Z) \
+                is_XY_needed = (dim>1) and (direction == MeshDir.Y)
-                    and (not self._is_distributed)
+                is_YX_needed = (dim>1) and (direction == MeshDir.X)
-            is_ZX_needed = (dim>2) and (direction == MeshDir.Y) \
+        elif dim==3:
-                    and (not self._is_distributed)
+            if smethod==DirectionalSplitting.STRANG_FIRST_ORDER:
+                is_XY_needed = (dim>1) and (direction == MeshDir.Y)
+                is_YX_needed = (dim>1) and (direction == MeshDir.Z)
+                is_XZ_needed = (dim>2) and (direction == MeshDir.Z) \
+                        and (not self._is_distributed)
+                is_ZX_needed = (dim>2) and (direction == MeshDir.Z) \
+                        and (not self._is_distributed)
+            elif smethod==DirectionalSplitting.STRANG_SECOND_ORDER:
+                is_XY_needed = (dim>1) and (direction == MeshDir.Y)
+                is_YX_needed = (dim>1) and (direction == MeshDir.X)
+                is_XZ_needed = (dim>2) and (direction == MeshDir.Z) \
+                        and (not self._is_distributed)
+                is_ZX_needed = (dim>2) and (direction == MeshDir.Y) \
+                        and (not self._is_distributed)
        if is_XY_needed:
            self._collect_transposition_kernel_xy()

--- a/hysop/gpu/kernel_autotuner.py
+++ b/hysop/gpu/kernel_autotuner.py
@@ -161,7 +161,8 @@ class KernelAutotuner(object):
    def _make_config_key(work_dim, typegen, build_opts):
        concat_unique_list = lambda L: '['+'_'.join([str(val) for val in frozenset(L)])+']'
        hasher = KernelAutotuner._hash_func()
-        hasher.update('{}_{}'.format(work_dim, concat_unique_list(build_opts)))
+        hasher.update('{}_{}_{}'.format(work_dim, concat_unique_list(build_opts),
+             typegen.__repr__()))
        return hasher.hexdigest() 
    def _update_configs(self):

--- a/hysop/gpu/static_gpu_particle_advection_dir.py
+++ b/hysop/gpu/static_gpu_particle_advection_dir.py
@@ -7,7 +7,7 @@ from hysop.gpu.gpu_particle_advection_dir import GPUParticleAdvectionDir, MeshDi
 from hysop.methods_keys import TimeIntegrator, Remesh, ExtraArgs, \
    Support, Splitting, MultiScale, Interpolation, Precision,\
-    StretchingFormulation
+    StretchingFormulation, BoundaryCondition
 from hysop.numerics.odesolvers    import Euler, RK2, RK3, RK4
 from hysop.numerics.interpolation import Linear
@@ -116,13 +116,6 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir):
        self._append_size_constants(
            [self.velocity_topo.ghosts()[self.direction]],
            prefix='V_GHOSTS_NB', suffix=[''])
-        enum = ['I', 'II', 'III']
-        self._append_size_constants(
-            self._reorder_vect(['NB' + d for d in S_DIR[:self.dim]]),
-            prefix='NB_', suffix=enum[:self.dim])
-        self._append_size_constants(
-            self._reorder_vect(['V_NB' + d for d in S_DIR[:self.dim]]),
-            prefix='V_NB_', suffix=enum[:self.dim])
        self._append_size_constants([self._is_multi_scale*1],
                prefix='ADVEC_IS_MULTISCALE', suffix=[''])
@@ -307,6 +300,14 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir):
        build_options += " -D WI_NB=" + str(WINb)
        build_options += " -D PART_NB_PER_WI="
        build_options += str(self.f_resol_dir[0] / WINb)
+        enum = ['I', 'II', 'III']
+        comp  = self._reorder_vect(['NB' + d for d in S_DIR[:self.dim]])
+        vcomp = self._reorder_vect(['V_NB' + d for d in S_DIR[:self.dim]])
+        for i,suffix in enumerate(enum[:self.dim]):
+            build_options += ' -D NB_{}={}'.format(suffix,comp[i])
+        for i,suffix in enumerate(enum[:self.dim]):
+            build_options += ' -D V_NB_{}={}'.format(suffix,vcomp[i])
        self._remesh = {}
        cnames = []
@@ -428,8 +429,8 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir):
        ftype              = self.cl_env.typegen.fbtype
        compute_resolution = self.fields_topo.mesh.compute_resolution
-        boundary           = self._stretching['boundary']
+        boundary           = BoundaryCondition.PERIODIC
-        rk_scheme          = self._stretching['rk_scheme']
+        rk_scheme          = self.method[TimeIntegrator]
        velocity    = self.velocity
        position    = self._particle_position
@@ -452,16 +453,16 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir):
                        build_opts=self._build_options,
                        autotuner_config=self._autotuner_config)
-        callback_profiler.register_tasks('advection', 
+        # callback_profiler.register_tasks('advection', 
-                ftype=ftype,
+                # ftype=ftype,
-                total_work=total_work, 
+                # total_work=total_work, 
-                per_work_statistic=per_work_statistic)
+                # per_work_statistic=per_work_statistic)
        def do_advec(dt,**kargs):
-            callback_profiler.tic('advection')
+            # callback_profiler.tic('advection')
            kernel_args[kernel_args_mapping['dt']] = self.cl_env.precision(dt)
            evt = kernel_launcher(*kernel_args)
-            callback_profiler.tac('advection',evt=evt)
+            # callback_profiler.tac('advection',evt=evt)
            return evt
        self._do_advec = do_advec