From a6c9db115e97ffaf59eb48389be342fb1f14c699 Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Keck <jean-baptiste.keck@imag.fr>
Date: Sun, 1 Oct 2017 13:27:58 +0200
Subject: [PATCH] custom hashes for autotunable kernels

---
 hysop/__init__.py                             |  2 +-
 hysop/backend/device/autotunable_kernel.py    | 48 ++++++++++++++++++-
 hysop/backend/device/kernel_autotuner.py      | 11 +++--
 .../autotunable_kernels/advection_dir.py      | 10 ++--
 .../opencl/autotunable_kernels/remesh_dir.py  | 10 ++--
 .../opencl/autotunable_kernels/transpose.py   |  4 +-
 .../opencl/opencl_autotunable_kernel.py       |  5 +-
 hysop/backend/device/opencl/opencl_env.py     | 14 +++---
 .../operator/directional/advection_dir.py     |  2 +-
 hysop/operator/base/redistribute_operator.py  |  4 +-
 10 files changed, 81 insertions(+), 29 deletions(-)

diff --git a/hysop/__init__.py b/hysop/__init__.py
index 279e86abe..d877538ec 100644
--- a/hysop/__init__.py
+++ b/hysop/__init__.py
@@ -20,7 +20,7 @@ __VERBOSE__        = False
 __DEBUG__          = False
 __TRACE__          = False
 __TRACE_WARNINGS__ = False
-__KERNEL_DEBUG__   = True
+__KERNEL_DEBUG__   = False
 __PROFILE__        = True
 
 __ENABLE_LONG_TESTS__ = "OFF" is "ON"
diff --git a/hysop/backend/device/autotunable_kernel.py b/hysop/backend/device/autotunable_kernel.py
index 13403e47d..12916e206 100644
--- a/hysop/backend/device/autotunable_kernel.py
+++ b/hysop/backend/device/autotunable_kernel.py
@@ -21,9 +21,55 @@ class AutotunableKernel(object):
 
         self.dump_src      = first_not_None(dump_src, autotuner_config.debug)
         self.symbolic_mode = first_not_None(symbolic_mode, autotuner_config.debug)
+
+    def custom_hash(self, *args, **kwds):
+        assert args or kwds, 'no arguments to be hashed.'
+        def _hash_arg(a):
+            if isinstance(a, list):
+                return hash(tuple(_hash_arg(x) for x in a))
+            elif isinstance(a, set):
+                return hash(tuple(_hash_arg(x) for x in a))
+            elif isinstance(a, dict):
+                return hash(tuple((_hash_arg(k), _hash_arg(v)) for (k,v) in a.items()))
+            elif isinstance(a, npw.ndarray):
+                assert a.ndim == 1
+                assert a.size < 17, 'Only parameters up to size 16 are allowed.'
+                return hash(tuple(a.tolist()))
+            else:
+                return hash(a)
+        def _hash_karg(k,v):
+            if k == 'mesh_info_vars':
+                # for mesh infos we just hash the code generated constants that 
+                # may alter the code branching.
+                from hysop.backend.device.codegen.base.variables import CodegenStruct
+                check_instance(v, dict, keys=str, values=CodegenStruct)
+                mesh_infos = tuple(str(v[k]) for k in sorted(v.keys()))
+                h = hash(mesh_infos)
+                return h
+            else:
+                msg='Unknown custom hash key \'\'.'.format(k)
+                raise KeyError(msg)
+
+        h = None
+        if args:
+            h = _hash_arg(args[0])
+            for arg in args[1:]:
+                h ^= _hash_arg(arg)
+        if kwds: 
+            items = kwds.items()
+            if h is None:
+                h  = _hash_karg(*items[0])
+            else:
+                h ^= _hash_karg(*items[0])
+            for it in items[1:]:
+                h ^= _hash_karg(*it)
+        return h
+        
     
     @abstractmethod
-    def autotune(self, name, kernel_args, **extra_kwds):
+    def autotune(self, name, kernel_args, 
+            force_verbose=False, force_debug=False,
+            **extra_kwds):
         """Autotune this kernel with given name and extra_kwds."""
         pass
 
diff --git a/hysop/backend/device/kernel_autotuner.py b/hysop/backend/device/kernel_autotuner.py
index 58ba6ef68..4ce75bb7d 100644
--- a/hysop/backend/device/kernel_autotuner.py
+++ b/hysop/backend/device/kernel_autotuner.py
@@ -150,14 +150,16 @@ class KernelAutotuner(object):
         src_hash = hasher.hexdigest()
 
         if (kernel_name != cached_kernel_name):
-            msg='\nCached kernel name did not match the benched one:\n {}\n {}'
-            msg=msg.format(kernel_name, cached_kernel_name)
+            msg='\nCached kernel name did not match the benched one:\n {}\n {}\n'
+            msg+='\nThis might be due to a faulty implementation of {}.hash_extra_kwds().'
+            msg=msg.format(kernel_name, cached_kernel_name, type(tkernel).__name__)
             warnings.warn(msg, CodeGeneratorWarning)
             return None
 
         if (src_hash != cached_src_hash):
             msg='\nCached kernel source hash did not match the benched one.\n {}\n {}'
-            msg=msg.format(src_hash, cached_src_hash)
+            msg+='\nThis might be due to a faulty implementation of {}.hash_extra_kwds().'
+            msg=msg.format(src_hash, cached_src_hash, type(tkernel).__name__)
             warnings.warn(msg, CodeGeneratorWarning)
             return None
         
@@ -235,7 +237,8 @@ class KernelAutotuner(object):
                                 (cache_src_hash, cache_stats) = results[run_key]
                                 if cache_src_hash != src_hash:
                                     msg='\nCached parameters candidate did not match the benched one.\n {}\n {}'
-                                    msg=msg.format(src_hash, cache_src_hash)
+                                    msg+='\nThis might be due to a faulty implementation of {}.hash_extra_kwds().'
+                                    msg=msg.format(src_hash, cached_src_hash, type(tunable_kernel).__name__)
                                     warnings.warn(msg, CodeGeneratorWarning)
                                     old_stats = None
                                 else:
diff --git a/hysop/backend/device/opencl/autotunable_kernels/advection_dir.py b/hysop/backend/device/opencl/autotunable_kernels/advection_dir.py
index 274e3fdf8..99e43a432 100644
--- a/hysop/backend/device/opencl/autotunable_kernels/advection_dir.py
+++ b/hysop/backend/device/opencl/autotunable_kernels/advection_dir.py
@@ -13,7 +13,7 @@ class OpenClAutotunableDirectionalAdvectionKernel(OpenClAutotunableKernel):
     """Autotunable interface for directional advection kernel code generators."""
 
     def autotune(self, direction, time_integrator, velocity_cfl, 
-                    velocity, position, precision):
+                    velocity, position, precision, **kwds):
         """Autotune this kernel with specified configuration."""
         
         dim = velocity.dim
@@ -67,7 +67,7 @@ class OpenClAutotunableDirectionalAdvectionKernel(OpenClAutotunableKernel):
                 rk_scheme=time_integrator, kernel_args=kernel_args, cache_ghosts=cache_ghosts,
                 vboundaries=vboundaries, precision=precision, ftype=ftype, 
                 mesh_info_vars=mesh_info_vars, work_dim=dim, 
-                work_size=position.compute_resolution)
+                work_size=position.compute_resolution, **kwds)
 
 
     def compute_args_mapping(self, extra_kwds, extra_parameters):
@@ -161,7 +161,7 @@ class OpenClAutotunableDirectionalAdvectionKernel(OpenClAutotunableKernel):
     def hash_extra_kwds(self, extra_kwds):
         """Hash extra_kwds dictionnary for caching purposes."""
         kwds = ('rk_scheme', 'ftype', 'work_dim', 
-                    'vboundaries', 'cache_ghosts')
-        return hash(tuple(extra_kwds[kwd] for kwd in kwds)) ^ \
-               hash(tuple(extra_kwds['work_size'].tolist()))
+                    'vboundaries', 'cache_ghosts', 'work_size')
+        return self.custom_hash(*tuple(extra_kwds[kwd] for kwd in kwds),
+                                mesh_info_vars=extra_kwds['mesh_info_vars'])
         
diff --git a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
index f0ea8a914..8e55ca03d 100644
--- a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
+++ b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
@@ -20,7 +20,7 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
     def autotune(self, precision, direction, scalar_cfl, 
             position, scalars_in, scalars_out, is_inplace,
             remesh_kernel, remesh_criteria_eps,
-            force_atomics, relax_min_particles):
+            force_atomics, relax_min_particles, **kwds):
         """Autotune this kernel with specified configuration."""
         check_instance(scalars_in,  tuple, values=CartesianDiscreteFieldView)
         check_instance(scalars_out, tuple, values=CartesianDiscreteFieldView)
@@ -161,7 +161,7 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                 remesh_kernel=remesh_kernel, remesh_criteria_eps=remesh_criteria_eps,
                 force_atomics=force_atomics, min_nparticles=min_nparticles, ftype=ftype,
                 scalar_cfl=scalar_cfl, kernel_args=kernel_args, mesh_info_vars=mesh_info_vars,
-                work_dim=work_dim, work_size=work_size, min_wg_size=min_wg_size)
+                work_dim=work_dim, work_size=work_size, min_wg_size=min_wg_size, **kwds)
 
 
     def compute_args_mapping(self, extra_kwds, extra_parameters):
@@ -339,7 +339,7 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
     def hash_extra_kwds(self, extra_kwds):
         """Hash extra_kwds dictionnary for caching purposes."""
         kwds = ('remesh_criteria_eps', 'nscalars', 'ftype', 
-                'is_inplace', 'remesh_kernel')
-        return hash(tuple(extra_kwds[kwd] for kwd in kwds)) ^ \
-               hash(tuple(extra_kwds['work_size'].tolist()))
+                'is_inplace', 'remesh_kernel', 'work_size')
+        return self.custom_hash(*tuple(extra_kwds[kwd] for kwd in kwds), 
+                mesh_info_vars=extra_kwds['mesh_info_vars'])
 
diff --git a/hysop/backend/device/opencl/autotunable_kernels/transpose.py b/hysop/backend/device/opencl/autotunable_kernels/transpose.py
index 14dea8a47..78d282253 100644
--- a/hysop/backend/device/opencl/autotunable_kernels/transpose.py
+++ b/hysop/backend/device/opencl/autotunable_kernels/transpose.py
@@ -31,7 +31,7 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
 
     def autotune(self, is_inplace, 
             input_field, output_field, 
-            axes, name=None):
+            axes, name=None, **kwds):
         """Autotune this kernel with specified axes, inputs and outputs."""
 
         check_instance(axes, tuple, values=int)
@@ -89,7 +89,7 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
                 tile_indices=tile_indices, 
                 work_dim=work_dim,
                 work_size=work_shape,
-                last_axe_permuted=last_axe_permuted)
+                last_axe_permuted=last_axe_permuted, **kwds)
 
     def compute_parameters(self, extra_kwds): 
         """Register extra parameters to optimize."""
diff --git a/hysop/backend/device/opencl/opencl_autotunable_kernel.py b/hysop/backend/device/opencl/opencl_autotunable_kernel.py
index 56d1974c3..a261662ca 100644
--- a/hysop/backend/device/opencl/opencl_autotunable_kernel.py
+++ b/hysop/backend/device/opencl/opencl_autotunable_kernel.py
@@ -28,11 +28,12 @@ class OpenClAutotunableKernel(AutotunableKernel):
         self.cl_env = cl_env
         self.usable_cache_bytes_per_wg = clCharacterize.usable_local_mem_size(cl_env.device)
         
-    def autotune(self, name, **extra_kwds):
+    def autotune(self, name, force_verbose=False, force_debug=False, **extra_kwds):
         from hysop.backend.device.opencl.opencl_kernel_autotuner import OpenClKernelAutotuner
         autotuner = OpenClKernelAutotuner(name=name, tunable_kernel=self)
         
-        best_candidate_results = autotuner.autotune(extra_kwds=extra_kwds)
+        best_candidate_results = autotuner.autotune(extra_kwds=extra_kwds,
+                force_verbose=force_verbose, force_debug=force_debug)
         check_instance(best_candidate_results, dict)
         
         return self.format_best_candidate(name=name, extra_kwds=extra_kwds, 
diff --git a/hysop/backend/device/opencl/opencl_env.py b/hysop/backend/device/opencl/opencl_env.py
index d9324eb3e..bd0921b08 100644
--- a/hysop/backend/device/opencl/opencl_env.py
+++ b/hysop/backend/device/opencl/opencl_env.py
@@ -587,6 +587,14 @@ Dumped OpenCL Kernel '{}'
         dump_folder=IO.default_path()+'/'+OPENCL_KERNEL_DUMP_FOLDER
         if not os.path.exists(dump_folder):
             os.makedirs(dump_folder)
+        
+        if DEBUG:
+            # dump kernel source while in debug mode
+            dump_file=dump_folder+'/'+'{}_dump.cl'.format(kernel_name)
+            print('Dumping kernel src at \'{}\'.'.format(dump_file))
+            with open(dump_file, 'w+') as f:
+                f.write(gpu_src)
+            #build_opts += ' '+' '.join(['-g', '-s "{}"'.format(dump_file)])
 
         # Build OpenCL program
         try:
@@ -607,12 +615,6 @@ Dumped OpenCL Kernel '{}'
                 build.get_build_info(self.device, cl.program_build_info.STATUS))
             vprint('Compiler log: ',
                 build.get_build_info(self.device, cl.program_build_info.LOG))
-        if DEBUG:
-            # dump kernel source while in debug mode
-            dump_file=dump_folder+'/'+'{}_dump.cl'.format(kernel_name)
-            print('Dumping kernel src at \'{}\'.'.format(dump_file))
-            with open(dump_file, 'w+') as f:
-                f.write(gpu_src)
 
         if VERBOSE:
             print("===\n")
diff --git a/hysop/backend/device/opencl/operator/directional/advection_dir.py b/hysop/backend/device/opencl/operator/directional/advection_dir.py
index 854c1c784..4b9950563 100644
--- a/hysop/backend/device/opencl/operator/directional/advection_dir.py
+++ b/hysop/backend/device/opencl/operator/directional/advection_dir.py
@@ -107,7 +107,7 @@ class OpenClDirectionalAdvection(DirectionalAdvectionBase, OpenClDirectionalOper
         kwds['velocity_cfl']    = self.velocity_cfl
         kwds['time_integrator'] = self.time_integrator
 
-        (advec_kernel, args_dict) = kernel.autotune(**kwds)
+        (advec_kernel, args_dict) = kernel.autotune(force_debug=True, **kwds)
 
         args_dict.pop('dt') 
         advec_launcher = advec_kernel.build_launcher(**args_dict)
diff --git a/hysop/operator/base/redistribute_operator.py b/hysop/operator/base/redistribute_operator.py
index 9bf6d91d8..4f6360bd6 100644
--- a/hysop/operator/base/redistribute_operator.py
+++ b/hysop/operator/base/redistribute_operator.py
@@ -69,12 +69,12 @@ class RedistributeOperatorBase(ComputationalGraphOperator):
             
         for field in self.input_vars:
             _, req = reqs.get_input_requirement(field) 
-            req.transposition_states = None
+            req.axes = None
             req.basis = None 
         
         for field in self.output_vars:
             _, req = reqs.get_output_requirement(field) 
-            req.transposition_states = None
+            req.axes = None
             req.basis = None 
 
         return reqs
-- 
GitLab