From 1e799e6d026243e194bcb5ece1822ebb8c276121 Mon Sep 17 00:00:00 2001
From: Keck Jean-Baptiste <jean-baptiste.keck@imag.fr>
Date: Thu, 21 Sep 2017 19:47:50 +0200
Subject: [PATCH] fixed transposition, kernel launchers, opencl kernels

---
 .../taylor_green/taylor_green_monoscale.py    |  14 +-
 hysop/__init__.py                             |   2 +-
 .../codegen/kernels/tests/test_transpose.py   |   4 +-
 .../device/codegen/kernels/transpose.py       |   4 +-
 .../opencl/autotunable_kernels/transpose.py   |   2 +-
 .../backend/device/opencl/opencl_allocator.py |   2 +-
 .../device/opencl/opencl_array_backend.py     |   5 +-
 .../opencl/opencl_autotunable_kernel.py       |   4 +-
 hysop/backend/device/opencl/opencl_copy.py    |   1 +
 hysop/backend/device/opencl/opencl_kernel.py  |  13 +-
 .../device/opencl/opencl_kernel_launcher.py   |   7 +-
 .../backend/device/opencl/opencl_operator.py  | 330 +++++++++---------
 .../device/opencl/operator/transpose.py       |   7 +-
 hysop/core/arrays/array.py                    |   4 +-
 hysop/core/arrays/array_backend.py            |   3 +-
 hysop/core/memory/memory_request.py           |   5 +-
 hysop/fields/cartesian_discrete_field.py      |   3 +-
 hysop/operator/base/advection_dir.py          |   3 +-
 hysop/operator/base/transpose_operator.py     |   3 +-
 hysop/operator/tests/test_transpose.py        |  26 +-
 hysop/operator/transpose.py                   |   1 -
 hysop/tools/misc.py                           |  11 +-
 hysop/tools/types.py                          |   3 +-
 hysop/topology/cartesian_topology.py          |   8 +-
 24 files changed, 244 insertions(+), 221 deletions(-)

diff --git a/examples/taylor_green/taylor_green_monoscale.py b/examples/taylor_green/taylor_green_monoscale.py
index fcfb69787..5b248d6d4 100644
--- a/examples/taylor_green/taylor_green_monoscale.py
+++ b/examples/taylor_green/taylor_green_monoscale.py
@@ -48,12 +48,14 @@ def init_vorticity(data, coords, dim):
         data[1][...] = - sin(x) * cos(y) * sin(z)
         data[2][...] = 2. * sin(x) * sin(y) * cos(z)
     elif dim==2:
-        #data[0][...] = cos(x)*cos(y)
         (x,y) = coords
-        (nx,ny) = (3,3)
-        i = np.floor((nx*x)/(2*pi))
-        j = np.floor((ny*y)/(2*pi))
-        data[0][...] = (-1)**(i+j)
+        #COS-COS
+        data[0][...] = cos(x)*cos(y)   
+        #CHECKERBOARD
+        #(nx,ny) = (3,3)
+        #i = np.floor((nx*x)/(2*pi))
+        #j = np.floor((ny*y)/(2*pi))
+        #data[0][...] = (-1)**(i+j)
     else:
         raise NotImplementedError('dimension {}'.format(dim))
 
@@ -76,7 +78,7 @@ def run(npts=64+1, viscosity=1./1600., lcfl=0.125, cfl=0.5):
             advected_fields = (vorti,),
             velocity_cfl = cfl,
             variables = {velo: d3d, vorti: d3d},
-            method = {TimeIntegrator: RK4, Remesh: Remesh.L2_1},
+            method = {TimeIntegrator: Euler, Remesh: Remesh.L2_1},
         )
     if dim==3:
         stretch = DirectionalStretching(
diff --git a/hysop/__init__.py b/hysop/__init__.py
index ed9bf2cc3..3afa33583 100644
--- a/hysop/__init__.py
+++ b/hysop/__init__.py
@@ -16,7 +16,7 @@ __FFTW_ENABLED__   = "ON" is "ON"
 __SCALES_ENABLED__ = "ON" is "ON"
 __OPTIMIZE__       = not __debug__
 
-__VERBOSE__        = True
+__VERBOSE__        = False
 __DEBUG__          = False
 __TRACE__          = False
 __KERNEL_DEBUG__   = False
diff --git a/hysop/backend/device/codegen/kernels/tests/test_transpose.py b/hysop/backend/device/codegen/kernels/tests/test_transpose.py
index cbc15ec7b..16ae23bcd 100644
--- a/hysop/backend/device/codegen/kernels/tests/test_transpose.py
+++ b/hysop/backend/device/codegen/kernels/tests/test_transpose.py
@@ -3,7 +3,7 @@ import copy, math, sys, os, tempfile
 
 from hysop import __ENABLE_LONG_TESTS__
 from hysop.deps import np, it
-from hysop.tools.misc import upper_pow2_or_3
+from hysop.tools.misc import upper_pow2_or_3, prod
 from hysop.tools.types import check_instance
 from hysop.tools.numerics import is_integer
 from hysop.backend.device.opencl import cl, clTools
@@ -248,7 +248,7 @@ class TestTranspose(object):
             for i in dak.workload_indexes:
                 local_work_size[i] = 1
             max_work = device.max_work_group_size
-            while (np.prod(local_work_size) > max_work):
+            while (prod(local_work_size) > max_work):
                 for i in xrange(work_dim-1,-1,-1):
                     if local_work_size[i] > 1:
                         break
diff --git a/hysop/backend/device/codegen/kernels/transpose.py b/hysop/backend/device/codegen/kernels/transpose.py
index 893ebe6c5..83049193b 100644
--- a/hysop/backend/device/codegen/kernels/transpose.py
+++ b/hysop/backend/device/codegen/kernels/transpose.py
@@ -2,7 +2,7 @@ import operator
 import numpy as np
 from contextlib import contextmanager, nested
 
-from hysop.tools.misc import upper_pow2_or_3
+from hysop.tools.misc import upper_pow2_or_3, prod
 from hysop.tools.decorators import static_vars
 from hysop.tools.numpywrappers import npw
 from hysop.tools.types import check_instance
@@ -136,7 +136,7 @@ class TransposeKernelGenerator(KernelCodeGenerator):
         nbytes       = np.dtype(dtype).itemsize
 
         tile_shape = self.tile_shape
-        tile_bytes = np.prod(tile_shape) * nbytes
+        tile_bytes = prod(tile_shape) * nbytes
         
         if self.contiguous_permutation:
             count = tile_bytes
diff --git a/hysop/backend/device/opencl/autotunable_kernels/transpose.py b/hysop/backend/device/opencl/autotunable_kernels/transpose.py
index 19eb967bf..7c5e175e3 100644
--- a/hysop/backend/device/opencl/autotunable_kernels/transpose.py
+++ b/hysop/backend/device/opencl/autotunable_kernels/transpose.py
@@ -36,7 +36,7 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
 
         check_instance(axes, tuple, values=int)
         check_instance(is_inplace, bool)
-        self._check_cartesian_fields(input_field, output_field, 
+        self.check_cartesian_fields(input_field, output_field, 
                 check_res=False, check_size=True)
             
         dim   = input_field.domain.dim
diff --git a/hysop/backend/device/opencl/opencl_allocator.py b/hysop/backend/device/opencl/opencl_allocator.py
index 79da1e550..0dbdb782a 100644
--- a/hysop/backend/device/opencl/opencl_allocator.py
+++ b/hysop/backend/device/opencl/opencl_allocator.py
@@ -107,4 +107,4 @@ class OpenClImmediateAllocator(OpenClAllocator):
         if isinstance(self, MemoryPool):
             msg='allocator is already a memory pool.'
             raise RuntimeError(msg)
-        return OpenClMemoryPool(allocator=self, name=name, verbose=True, **kwds) 
+        return OpenClMemoryPool(allocator=self, name=name, verbose=None, **kwds) 
diff --git a/hysop/backend/device/opencl/opencl_array_backend.py b/hysop/backend/device/opencl/opencl_array_backend.py
index e65e3fc62..26d570a99 100644
--- a/hysop/backend/device/opencl/opencl_array_backend.py
+++ b/hysop/backend/device/opencl/opencl_array_backend.py
@@ -3,6 +3,7 @@ import warnings
 from hysop.deps import re, np, os
 from hysop.tools.types import check_instance, to_tuple
 from hysop.tools.hash import hash_id
+from hysop.tools.misc import prod
 from hysop.tools.numerics import is_complex, get_dtype, float_to_complex_dtype, \
                                  complex_to_float_dtype, find_common_dtype
 from hysop.constants import Backend
@@ -1366,8 +1367,8 @@ class OpenClArrayBackend(ArrayBackend):
             if min_alignment < self.allocator.device.mem_base_addr_align:
                 alignment=1
                 dtype = np.dtype(dtype)
-                size = int(np.prod(shape)*dtype.itemsize)
-                # np.prod( shape=(,) ) will return 1.0, so we cast to int for scalars
+                size = int(prod(shape)*dtype.itemsize)
+                # prod( shape=(,) ) will return 1.0, so we cast to int for scalars
             else:
                 (size,nbytes,alignment) = self.get_alignment_and_size(shape=shape,
                         dtype=dtype, min_alignment=min_alignment)
diff --git a/hysop/backend/device/opencl/opencl_autotunable_kernel.py b/hysop/backend/device/opencl/opencl_autotunable_kernel.py
index 952ceb7e1..67c50ce87 100644
--- a/hysop/backend/device/opencl/opencl_autotunable_kernel.py
+++ b/hysop/backend/device/opencl/opencl_autotunable_kernel.py
@@ -21,7 +21,7 @@ class OpenClAutotunableKernel(AutotunableKernel):
                         autotuner_config=autotuner_config,
                         build_opts=build_opts, **kwds)
 
-        self._check_cl_env(cl_env)
+        self.check_cl_env(cl_env)
 
         self.cl_env = cl_env
         self.usable_cache_bytes_per_wg = clCharacterize.usable_local_mem_size(cl_env.device)
@@ -126,7 +126,7 @@ class OpenClAutotunableKernel(AutotunableKernel):
         """
         return self.cl_env.device.max_work_item_sizes
 
-    def check_cache(required_cache_size):
+    def check_cache(self, required_cache_size):
         """Check that required_cache_size bytes can fit in workgroup cache."""
         usable_cache_bytes_per_wg = self.usable_cache_bytes_per_wg
         if (required_cache_size > usable_cache_bytes_per_wg):
diff --git a/hysop/backend/device/opencl/opencl_copy.py b/hysop/backend/device/opencl/opencl_copy.py
index 7feceb58a..b75dd13ef 100644
--- a/hysop/backend/device/opencl/opencl_copy.py
+++ b/hysop/backend/device/opencl/opencl_copy.py
@@ -48,6 +48,7 @@ class OpenClCopyKernelLauncher(OpenClKernelLauncher):
         queue = first_not_None(queue, self._default_queue)
         check_instance(queue, cl.CommandQueue)
         evt = cl.enqueue_copy(queue=queue, **self._enqueue_copy_kwds)
+        return evt
 
     def global_size_configured(self):
         return True
diff --git a/hysop/backend/device/opencl/opencl_kernel.py b/hysop/backend/device/opencl/opencl_kernel.py
index aa89b36ac..beb71d8c4 100644
--- a/hysop/backend/device/opencl/opencl_kernel.py
+++ b/hysop/backend/device/opencl/opencl_kernel.py
@@ -58,8 +58,8 @@ class OpenClKernel(object):
         self._default_local_work_size = default_local_work_size
         self._kid = 0
         
-        assert len(program.all_kernels)==1
-        self._kernel = program.all_kernels[0]
+        assert len(program.all_kernels())==1
+        self._kernel = program.all_kernels()[0]
         
         default_args = default_args or {}
         self.default_args = default_args
@@ -89,7 +89,6 @@ class OpenClKernel(object):
     def _set_default_args(self, default_args):
         """Set default arguments."""
         args_mapping = self._args_mapping
-        default_args = self._default_args
         nargs = len(args_mapping)
         positions = () 
         for argname in default_args:
@@ -154,18 +153,18 @@ class OpenClKernel(object):
                             args_list=args_list, parameters_map=parameters_map,
                             default_global_work_size=global_work_size,
                             default_local_work_size=local_work_size,
-                            default_queue=default_queue)
+                            default_queue=queue)
         else:
             klauncher = OpenClKernelLauncher(name=name, kernel=self._program, 
                             args_list=args_list,
                             default_global_work_size=global_work_size,
                             default_local_work_size=local_work_size,
-                            default_queue=default_queue)
+                            default_queue=queue)
 
         self._kid += 1
         return klauncher
 
-    def _compute_args_list(**kwds):
+    def _compute_args_list(self, **kwds):
         """
         Compute argument list from default arguments and input keywords.
         If all arguments are not specified, also return a parameter_map 
@@ -182,7 +181,7 @@ class OpenClKernel(object):
         args_per_index = (len(parameters_map)>0) 
         
         args_list = ()
-        for (arg_name, arg_value) in arguments:
+        for (arg_name, arg_value) in arguments.iteritems():
             if (arg_name not in args_mapping):
                 msg='Unknown argument {}, valid ones are {}.'
                 msg=msg.format(arg_name, ', '.join(args_mapping.keys()))
diff --git a/hysop/backend/device/opencl/opencl_kernel_launcher.py b/hysop/backend/device/opencl/opencl_kernel_launcher.py
index 6e43f5bb7..c744d101c 100644
--- a/hysop/backend/device/opencl/opencl_kernel_launcher.py
+++ b/hysop/backend/device/opencl/opencl_kernel_launcher.py
@@ -100,6 +100,11 @@ class OpenClKernelListLauncher(object):
                 raise TypeError(msg)
         return self
 
+    def __iadd__(self, kernel):
+        """Push a kernel into the list."""
+        self.push_kernels(kernel)
+        return self
+
     def __call__(self, queue, wait_for=None, **kwds):
         """
         Enqueue all kernels on the given queue in order.
@@ -347,7 +352,7 @@ class OpenClKernelLauncher(object):
        
         dprint(self._apply_msg.format(global_work_size, local_work_size))
         
-        kernel = self.set_kernel_args(**kwds)
+        kernel = self._set_kernel_args(**kwds)
         
         evt = cl.enqueue_nd_range_kernel(queue=queue, kernel=kernel, 
                 global_work_size=global_work_size, 
diff --git a/hysop/backend/device/opencl/opencl_operator.py b/hysop/backend/device/opencl/opencl_operator.py
index 2e21e228a..569cfa878 100644
--- a/hysop/backend/device/opencl/opencl_operator.py
+++ b/hysop/backend/device/opencl/opencl_operator.py
@@ -35,190 +35,190 @@ class OpenClOperator(ComputationalGraphOperator):
     
     @classmethod
     def default_method(cls):
-        dm = super(DirectionalAdvectionBase, cls).default_method()
+        dm = super(OpenClOperator, cls).default_method()
         dm.update(cls.__default_method)
         return dm
 
     @classmethod
     def available_methods(cls):
-        am = super(DirectionalAdvectionBase, cls).available_methods()
+        am = super(OpenClOperator, cls).available_methods()
         am.update(cls.__available_methods)
         return am
 
-@debug
-def __init__(self, cl_env=None, mpi_params=None, **kwds):
-    """
-    Create the common attributes of all OpenCL operators.
-    See handle_method() and setup().
-
-    All input and output variable topologies should be of kind
-    Backend.OPENCL and share the same OpenClEnvironment.
-
-    Attributes
-    ----------
-    cl_env: OpenClEnvironment
-        OpenCL environment shared accross all topologies.
-
-    Notes
-    -----
-    About method keys:
-        OpenClKernelConfig: user build options, defines, precision 
-                            and autotuner configuration
-    """
-    check_instance(cl_env, OpenClEnvironment, allow_none=True)
-    check_instance(mpi_params, MPIParams, allow_none=True)
-
-    msg='mpi_params was {} and cl_env was {}.'
-    if (mpi_params is None):
-        if (cl_env is None):
-            _vars = kwds.get('input_vars', None) or kwds.get('output_vars', None)
-            assert (_vars is not None), 'No input or output variables.'
-            domain = _vars.keys()[0].domain
-            mpi_params = MPIParams(comm=domain.task_comm,
-                                   task_id=domain.current_task())
-            cl_env     = get_or_create_opencl_env(mpi_params)
-            msg=msg.format('None', 'None')
-        else:
-            mpi_params = cl_env.mpi_params
-            msg=msg.format('None', 'given')
-    else:
-        if (cl_env is None):
-            cl_env = get_or_create_opencl_env(mpi_params)
-            msg=msg.format('given', 'None')
+    @debug
+    def __init__(self, cl_env=None, mpi_params=None, **kwds):
+        """
+        Create the common attributes of all OpenCL operators.
+        See handle_method() and setup().
+
+        All input and output variable topologies should be of kind
+        Backend.OPENCL and share the same OpenClEnvironment.
+
+        Attributes
+        ----------
+        cl_env: OpenClEnvironment
+            OpenCL environment shared accross all topologies.
+
+        Notes
+        -----
+        About method keys:
+            OpenClKernelConfig: user build options, defines, precision 
+                                and autotuner configuration
+        """
+        check_instance(cl_env, OpenClEnvironment, allow_none=True)
+        check_instance(mpi_params, MPIParams, allow_none=True)
+
+        msg='mpi_params was {} and cl_env was {}.'
+        if (mpi_params is None):
+            if (cl_env is None):
+                _vars = kwds.get('input_vars', None) or kwds.get('output_vars', None)
+                assert (_vars is not None), 'No input or output variables.'
+                domain = _vars.keys()[0].domain
+                mpi_params = MPIParams(comm=domain.task_comm,
+                                       task_id=domain.current_task())
+                cl_env     = get_or_create_opencl_env(mpi_params)
+                msg=msg.format('None', 'None')
+            else:
+                mpi_params = cl_env.mpi_params
+                msg=msg.format('None', 'given')
         else:
-            msg=msg.format('given', 'given')
-            pass
-    
-    super(OpenClOperator, self).__init__(mpi_params=mpi_params, **kwds)
-    self.cl_env = cl_env
-    
-    if (cl_env.mpi_params is not self.mpi_params):
-        msg0='MPI Communicators do not match between OpenClEnvironment and MPIParams.'
-        msg0+='\n  => {}'.format(msg)
-        raise RuntimeError(msg0)
+            if (cl_env is None):
+                cl_env = get_or_create_opencl_env(mpi_params)
+                msg=msg.format('given', 'None')
+            else:
+                msg=msg.format('given', 'given')
+                pass
+        
+        super(OpenClOperator, self).__init__(mpi_params=mpi_params, **kwds)
+        self.cl_env = cl_env
+        
+        if (cl_env.mpi_params is not self.mpi_params):
+            msg0='MPI Communicators do not match between OpenClEnvironment and MPIParams.'
+            msg0+='\n  => {}'.format(msg)
+            raise RuntimeError(msg0)
 
 
-def supported_backends(self):
-    """
-    Return the backends that this operator's topologies can support.
-    """
-    return set([Backend.OPENCL])
+    def supported_backends(self):
+        """
+        Return the backends that this operator's topologies can support.
+        """
+        return set([Backend.OPENCL])
 
-@debug
-def handle_method(self, method):
-    """
-    Extract device configuration and precision from OpenClKernelConfig.
-    """
-    super(OpenClOperator,self).handle_method(method)
-    
-    assert OpenClKernelConfig in method
+    @debug
+    def handle_method(self, method):
+        """
+        Extract device configuration and precision from OpenClKernelConfig.
+        """
+        super(OpenClOperator,self).handle_method(method)
         
-    kernel_config = method.pop(OpenClKernelConfig)
-    autotuner_config = kernel_config.autotuner_config
-    
-    precision = kernel_config.precision
-
-    if precision == Precision.SAME:
-        precision = self.cl_env.precision
-    elif precision in [Precision.LONG_DOUBLE, Precision.QUAD]:
-        msg='Precision {} is not supported for OpenCl environment.'
-        msg=msg.format(precision)
-        raise ValueError(msg)
-    else:
-        from hysop.backend.device.opencl.opencl_tools import convert_precision
-        precision = convert_precision(precision)
-    
-    self.precision = precision
-    self.autotuner_config = autotuner_config
-    
-    self._initialize_cl_build_options(kernel_config.user_build_options)
-    self._initialize_cl_size_constants(kernel_config.user_size_constants)
-    
-def check(self):
-    super(OpenClOperator, self).check()
-    self._check_cl_env()
+        assert OpenClKernelConfig in method
+            
+        kernel_config = method.pop(OpenClKernelConfig)
+        autotuner_config = kernel_config.autotuner_config
+        
+        precision = kernel_config.precision
+
+        if precision == Precision.SAME:
+            precision = self.cl_env.precision
+        elif precision in [Precision.LONG_DOUBLE, Precision.QUAD]:
+            msg='Precision {} is not supported for OpenCl environment.'
+            msg=msg.format(precision)
+            raise ValueError(msg)
+        else:
+            from hysop.backend.device.opencl.opencl_tools import convert_precision
+            precision = convert_precision(precision)
+        
+        self.precision = precision
+        self.autotuner_config = autotuner_config
+        
+        self._initialize_cl_build_options(kernel_config.user_build_options)
+        self._initialize_cl_size_constants(kernel_config.user_size_constants)
+        
+    def check(self):
+        super(OpenClOperator, self).check()
+        self._check_cl_env()
 
-@debug
-def get_field_requirements(self):
-    """
-    called just after handle_method(), ie self.method has been set.
-    topology requirements are:
-        1) min and max ghosts for each input and output variables
-        2) allowed splitting directions for cartesian topologies
-        3) required local and global transposition state, if any. 
-        and more
-    they are stored in self.input_field_requirements and
-    self.output_field_requirements.
-
-    keys are continuous fields and values are of type
-    hysop.fields.field_requirement.discretefieldrequirements
-
-    default is backend.opencl, no min or max ghosts, basis.cartesian and no specific
-    transposition state for each input and output variables.
-    """
+    @debug
+    def get_field_requirements(self):
+        """
+        called just after handle_method(), ie self.method has been set.
+        topology requirements are:
+            1) min and max ghosts for each input and output variables
+            2) allowed splitting directions for cartesian topologies
+            3) required local and global transposition state, if any. 
+            and more
+        they are stored in self.input_field_requirements and
+        self.output_field_requirements.
+
+        keys are continuous fields and values are of type
+        hysop.fields.field_requirement.discretefieldrequirements
+
+        default is backend.opencl, no min or max ghosts, basis.cartesian and no specific
+        transposition state for each input and output variables.
+        """
 
-    # by default we create OPENCL (gpu) TopologyDescriptors 
-    for field, topo_descriptor in self.input_vars.iteritems():
-        topo_descriptor = TopologyDescriptor.build_descriptor(
-                backend=Backend.OPENCL,
-                operator=self,
-                field=field,
-                handle=topo_descriptor,
-                cl_env=self.cl_env)
-        self.input_vars[field] = topo_descriptor
-
-    for field, topo_descriptor in self.output_vars.iteritems():
-        topo_descriptor = TopologyDescriptor.build_descriptor(
-                backend=Backend.OPENCL,
-                operator=self,
-                field=field,
-                handle=topo_descriptor,
-                cl_env=self.cl_env)
-        self.output_vars[field] = topo_descriptor
-
-    return super(OpenClOperator, self).get_field_requirements()
-
-def build_options(self):
-    """
-    Build and return opencl build option string from 
-    self._cl_build_options and self._cl_defines.
-    """
-    build_options = self._cl_build_options
-    defines = set()
-    for define,value in self._cl_defines.iteritems():
-        if (value is not None):
-            define = '{}={}'.format(define.strip(), value.strip())
-        else:
-            define = define.strip()
-        defines.update(define)
-    return ' '.join(build_options) + ' -D'.join(defines)
+        # by default we create OPENCL (gpu) TopologyDescriptors 
+        for field, topo_descriptor in self.input_vars.iteritems():
+            topo_descriptor = TopologyDescriptor.build_descriptor(
+                    backend=Backend.OPENCL,
+                    operator=self,
+                    field=field,
+                    handle=topo_descriptor,
+                    cl_env=self.cl_env)
+            self.input_vars[field] = topo_descriptor
+
+        for field, topo_descriptor in self.output_vars.iteritems():
+            topo_descriptor = TopologyDescriptor.build_descriptor(
+                    backend=Backend.OPENCL,
+                    operator=self,
+                    field=field,
+                    handle=topo_descriptor,
+                    cl_env=self.cl_env)
+            self.output_vars[field] = topo_descriptor
+
+        return super(OpenClOperator, self).get_field_requirements()
+
+    def build_options(self):
+        """
+        Build and return opencl build option string from 
+        self._cl_build_options and self._cl_defines.
+        """
+        build_options = self._cl_build_options
+        defines = set()
+        for define,value in self._cl_defines.iteritems():
+            if (value is not None):
+                define = '{}={}'.format(define.strip(), value.strip())
+            else:
+                define = define.strip()
+            defines.update(define)
+        return ' '.join(build_options) + ' -D'.join(defines)
 
-@debug
-def _check_cl_env(self):
-    """
-    Check if all topologies are on OpenCL backend and check that all opencl environments 
-    match.
-    """
-    topo = (self.input_vars.values()+self.output_vars.values())[0]
-    assert isinstance(topo, Topology)
-    assert topo.backend.kind == Backend.OPENCL
-    ref_env = self.cl_env
-    
-    for topo in set(self.input_vars.values()+self.output_vars.values()):
+    @debug
+    def _check_cl_env(self):
+        """
+        Check if all topologies are on OpenCL backend and check that all opencl environments 
+        match.
+        """
+        topo = (self.input_vars.values()+self.output_vars.values())[0]
         assert isinstance(topo, Topology)
         assert topo.backend.kind == Backend.OPENCL
-        assert topo.cl_env == ref_env
+        ref_env = self.cl_env
+        
+        for topo in set(self.input_vars.values()+self.output_vars.values()):
+            assert isinstance(topo, Topology)
+            assert topo.backend.kind == Backend.OPENCL
+            assert topo.cl_env == ref_env
 
-@debug
-def _initialize_cl_build_options(self, user_options):
-    """
-    Initialize OpenCl build options.
-    """
-    check_instance(user_options, list)
-    build_options = set()
-    build_options.update(self.cl_env.default_build_opts)
-    build_options.update(user_options)
-    self._cl_build_options = build_options
+    @debug
+    def _initialize_cl_build_options(self, user_options):
+        """
+        Initialize OpenCl build options.
+        """
+        check_instance(user_options, list)
+        build_options = set()
+        build_options.update(self.cl_env.default_build_opts)
+        build_options.update(user_options)
+        self._cl_build_options = build_options
 
     @debug
     def _initialize_cl_size_constants(self, user_size_constants):
diff --git a/hysop/backend/device/opencl/operator/transpose.py b/hysop/backend/device/opencl/operator/transpose.py
index 1f89e509c..518d88423 100644
--- a/hysop/backend/device/opencl/operator/transpose.py
+++ b/hysop/backend/device/opencl/operator/transpose.py
@@ -7,9 +7,14 @@ from hysop.tools.decorators import debug
 from hysop.operator.base.transpose_operator import TransposeOperatorBase
 from hysop.backend.device.opencl.opencl_operator import OpenClOperator, op_apply
 from hysop.backend.device.opencl.autotunable_kernels.transpose import OpenClAutotunableTransposeKernel
+from hysop.backend.device.opencl.opencl_kernel_launcher import OpenClKernelListLauncher
 
 class OpenClTranspose(TransposeOperatorBase, OpenClOperator):
 
+    @debug
+    def __init__(self, **kwds):
+        super(OpenClTranspose, self).__init__(**kwds)
+
     @debug
     def discretize(self):
         super(OpenClTranspose,self).discretize()
@@ -38,7 +43,7 @@ class OpenClTranspose(TransposeOperatorBase, OpenClOperator):
         (transpose, compute_inplace) = kernel.autotune(axes=axes, 
                 is_inplace=is_inplace, input_field=input_field, output_field=output_field)
         
-        launcher = OpenClKernelListLauncher(name=kernel.name)
+        launcher = OpenClKernelListLauncher(name=transpose.name)
         for i in xrange(self.nb_components):
             if compute_inplace:
                 launcher += transpose.build_launcher(inout=input_field[i].data)
diff --git a/hysop/core/arrays/array.py b/hysop/core/arrays/array.py
index 3a5f850f8..d5fd4369e 100644
--- a/hysop/core/arrays/array.py
+++ b/hysop/core/arrays/array.py
@@ -3,9 +3,11 @@ import numpy as np
 from abc import ABCMeta, abstractmethod
 from hysop.constants  import MemoryOrdering
 from hysop.constants  import DirectionLabels, default_order
+from hysop.tools.misc import prod
 from hysop.tools.types import check_instance
 from hysop.tools.numpywrappers import slices_empty
 from hysop.tools.decorators import required_property, optional_property
+
     
 class Array(object):
     """
@@ -237,7 +239,7 @@ class Array(object):
         """
         Number of elements in the array.
         """
-        return np.prod(self.get_shape())
+        return prod(self.get_shape())
     
     def get_itemsize(self):
         """
diff --git a/hysop/core/arrays/array_backend.py b/hysop/core/arrays/array_backend.py
index eabbac65f..9f6c9d78b 100644
--- a/hysop/core/arrays/array_backend.py
+++ b/hysop/core/arrays/array_backend.py
@@ -4,6 +4,7 @@ from hysop.deps import np, sys
 from hysop.constants import default_order, MemoryOrdering
 from hysop.constants import HYSOP_REAL, HYSOP_COMPLEX
 from hysop.constants import HYSOP_INTEGER, HYSOP_INDEX, HYSOP_DIM, HYSOP_BOOL
+from hysop.tools.misc import prod
 from hysop.tools.types    import check_instance, to_tuple, to_list
 from hysop.tools.numerics import is_fp, is_complex, match_float_type, \
                                  match_complex_type, complex_to_float_dtype
@@ -93,7 +94,7 @@ class ArrayBackend(object):
         """
         bytes_per_elem  = dtype.itemsize
         min_alignment   = min_alignment or 1
-        min_alloc_bytes = np.prod(shape) * bytes_per_elem
+        min_alloc_bytes = prod(shape) * bytes_per_elem
 
         msg0='min_alignment is not a power of two, got {}.'
         msg1='bytes_per_elem is not a power of two, got {}.'
diff --git a/hysop/core/memory/memory_request.py b/hysop/core/memory/memory_request.py
index 3efc91348..2facdf55d 100644
--- a/hysop/core/memory/memory_request.py
+++ b/hysop/core/memory/memory_request.py
@@ -5,6 +5,7 @@ from hysop.deps import np, copy
 from hysop.tools.types import check_instance, first_not_None
 from hysop.tools.units import bytes2str
 from hysop.tools.numerics import get_dtype
+from hysop.tools.misc import prod
 from hysop.core.arrays.array_backend import ArrayBackend
 from hysop.constants import HYSOP_BOOL, Backend
 
@@ -78,9 +79,9 @@ class MemoryRequest(object):
             msg += '\nSpecify one either explicitely or through a numpy.dtype.'
             raise ValueError(msg)
         
-        if np.prod(shape) != size:
+        if prod(shape) != size:
             msg='Shape does not match size (size={}, prod(shape)={}).'
-            msg.format(size, np.prod(shape))
+            msg.format(size, prod(shape))
             raise ValueError(msg)
         if alignment <= 0:
             msg = 'Alignment should be positive (got {}).'.format(alignment)
diff --git a/hysop/fields/cartesian_discrete_field.py b/hysop/fields/cartesian_discrete_field.py
index 5f216f2ca..add6b4226 100644
--- a/hysop/fields/cartesian_discrete_field.py
+++ b/hysop/fields/cartesian_discrete_field.py
@@ -12,6 +12,7 @@ from hysop.constants import Backend
 from hysop.tools.decorators import debug
 from hysop.tools.types import check_instance, to_tuple
 from hysop.tools.numpywrappers import npw
+from hysop.tools.misc import prod
 from hysop.fields.discrete_field import DiscreteField, DiscreteFieldView
 from hysop.topology.cartesian_topology import CartesianTopologyState
 
@@ -219,7 +220,7 @@ class CartesianDiscreteFieldView(DiscreteFieldView):
     
     def _get_size(self):
         """Size of the underlying contiguous data arrays."""
-        return np.prod(self.shape, dtype=np.int32)
+        return prod(self.shape)
     def _get_shape(self):
         """Alias for resolution."""
         return self.mesh.local_resolution
diff --git a/hysop/operator/base/advection_dir.py b/hysop/operator/base/advection_dir.py
index 769705b5c..dc476007d 100644
--- a/hysop/operator/base/advection_dir.py
+++ b/hysop/operator/base/advection_dir.py
@@ -208,6 +208,7 @@ class DirectionalAdvectionBase(object):
         request, mesh = MemoryRequest.dfield_like(a=f, ghosts=0, nb_components=1)
         requests.push_mem_request('position', request)
         assert all(self.dadvected_fields_in.values()[0].shape  == mesh.local_resolution)
+        self.position_mesh = mesh
         return requests
 
     @debug
@@ -222,7 +223,7 @@ class DirectionalAdvectionBase(object):
         """
         if (work is None):
             raise ValueError('work is None.')
-        self.dposition, self.position_mesh = work.get_buffer(self, 'position')
+        self.dposition, = work.get_buffer(self, 'position')
     
     ## Backend methods
     # ComputationalNode
diff --git a/hysop/operator/base/transpose_operator.py b/hysop/operator/base/transpose_operator.py
index 22bc6bcac..46779dc5a 100644
--- a/hysop/operator/base/transpose_operator.py
+++ b/hysop/operator/base/transpose_operator.py
@@ -54,8 +54,7 @@ class TransposeOperatorBase(object):
         input_vars  = { input_field:  variables[input_field] }
         output_vars = { output_field: variables[output_field] }
         
-        print kwds
-        super(TransposeOperatorBase,self).__init__(input_vars=input_vars,
+        super(TransposeOperatorBase, self).__init__(input_vars=input_vars,
                 output_vars=output_vars, **kwds)
 
         self.input_field = input_field
diff --git a/hysop/operator/tests/test_transpose.py b/hysop/operator/tests/test_transpose.py
index 315cd25ee..bd78960ef 100644
--- a/hysop/operator/tests/test_transpose.py
+++ b/hysop/operator/tests/test_transpose.py
@@ -21,7 +21,7 @@ class TestTransposeOperator(object):
         IO.set_default_path('/tmp/hysop_tests/test_transpose')
         
         if enable_debug_mode:
-            cls.size_min = 3
+            cls.size_min = 2
             cls.size_max = 6
         else:
             cls.size_min = 2
@@ -195,9 +195,9 @@ class TestTransposeOperator(object):
                  np.uint8, np.uint16, np.uint32, np.uint64,
                  np.float32, np.float64]
         random.shuffle(types)
-        for i in xrange(5,17):
+        for i in xrange(5,9):
             self._test(dim=i, dtype=types[i%len(types)], is_inplace=False,
-                    size_min=2, size_max=4, naxes=1)
+                    size_min=3, size_max=4, naxes=1)
     
     def test_2d_int_inplace(self):
         self._test(dim=2, dtype=np.int32, is_inplace=True)
@@ -210,14 +210,20 @@ class TestTransposeOperator(object):
     def test_4d_int_inplace(self):
         self._test(dim=4, dtype=np.int32, is_inplace=True)
     def test_upper_dimensions_inplace(self):
-        raise NotImplementedError()
+        types = [np.int8, np.int16, np.int32, np.int64,
+                 np.uint8, np.uint16, np.uint32, np.uint64,
+                 np.float32, np.float64]
+        random.shuffle(types)
+        for i in xrange(5,9):
+            self._test(dim=i, dtype=types[i%len(types)], is_inplace=True,
+                    size_min=3, size_max=4, naxes=1)
 
     def perform_tests(self):
-        #self.test_2d_int_out_of_place()
-        #self.test_2d_float_out_of_place()
-        #self.test_3d_int_out_of_place()
-        #self.test_3d_float_out_of_place()
-        #self.test_4d_int_out_of_place()
+        self.test_2d_int_out_of_place()
+        self.test_2d_float_out_of_place()
+        self.test_3d_int_out_of_place()
+        self.test_3d_float_out_of_place()
+        self.test_4d_int_out_of_place()
         self.test_upper_dimensions_out_of_place()
         
         self.test_2d_int_inplace()
@@ -229,7 +235,7 @@ class TestTransposeOperator(object):
     
 if __name__ == '__main__':
     TestTransposeOperator.setup_class(enable_extra_tests=False, 
-                                      enable_debug_mode=True)
+                                      enable_debug_mode=False)
     
     test = TestTransposeOperator()
     test.perform_tests()
diff --git a/hysop/operator/transpose.py b/hysop/operator/transpose.py
index 348951ac0..4c3cd8ba9 100644
--- a/hysop/operator/transpose.py
+++ b/hysop/operator/transpose.py
@@ -56,7 +56,6 @@ class Transpose(ComputationalGraphNodeGenerator):
     @debug
     def __init__(self, fields, variables, axes,
                 output_fields=None,
-                #components_in=None, components_out=None,
                 implementation=None, 
                 name=None,
                 base_kwds=None, 
diff --git a/hysop/tools/misc.py b/hysop/tools/misc.py
index 6f3e2cd8a..8e10e0618 100644
--- a/hysop/tools/misc.py
+++ b/hysop/tools/misc.py
@@ -10,11 +10,11 @@
 from hysop.deps import inspect, np, functools, operator
 from hysop.constants import HYSOP_REAL, HYSOP_INTEGER
 
-def prod(values, neutral=1):
+def prod(values):
     """
     Like sum but for products.
     """
-    return functools.reduce(operator.mul, values, neutral)
+    return np.prod(values, dtype=np.int64)
 
 def get_default_args(func):
     """
@@ -213,11 +213,10 @@ class WorkSpaceTools(object):
         """
         from hysop.tools.numpywrappers import npw
         result = []
-        # print subshape, len(subshape), np.prod(subshape[0])
         if isinstance(subshape, list):
-            subsize = [np.prod(subshape[i]) for i in xrange(len(subshape))]
+            subsize = [prod(subshape[i]) for i in xrange(len(subshape))]
         else:
-            subsize = [np.prod(subshape), ] * lwork
+            subsize = [prod(subshape), ] * lwork
             subshape = [subshape, ] * lwork
         if work is None:
             for i in xrange(lwork):
@@ -352,7 +351,7 @@ class WorkSpaceTools(object):
         for prop in properties:
             lp = len(prop)
             for i in xrange(lp):
-                shapes[i] = tuple(np.maximum(shapes[i], np.prod(prop[i])))
+                shapes[i] = tuple(np.maximum(shapes[i], prod(prop[i])))
         work = [npw.zeros(shape) for shape in shapes]
 
         return work
diff --git a/hysop/tools/types.py b/hysop/tools/types.py
index 16fdf053f..0c72c3b5d 100644
--- a/hysop/tools/types.py
+++ b/hysop/tools/types.py
@@ -1,6 +1,7 @@
 
 from hysop.deps import np
 from collections import Iterable
+from hysop.tools.misc import prod
 
 class InstanceOf(object):
     def __init__(self, cls):
@@ -152,7 +153,7 @@ def check_instance(val, cls, allow_none=False, **kargs):
         maxval = kargs.pop('maxval', None)
         all_val_cls = kargs.pop('values', None)
         if shape and size:
-            assert np.prod(shape)==size
+            assert prod(shape)==size
         if dtype and (dtype != val.dtype):
             msg='np.ndarray dtype does not match, expected {} but got {}.'
             msg=msg.format(dtype, val.dtype)
diff --git a/hysop/topology/cartesian_topology.py b/hysop/topology/cartesian_topology.py
index 7a3e6add9..026ed5034 100644
--- a/hysop/topology/cartesian_topology.py
+++ b/hysop/topology/cartesian_topology.py
@@ -15,7 +15,7 @@ from hysop.domain.box import Box, BoxView
 from hysop.core.mpi import MPI
 from hysop.tools.types import check_instance, to_tuple, first_not_None
 from hysop.tools.parameters import Discretization, MPIParams
-from hysop.tools.misc import Utils
+from hysop.tools.misc import Utils, prod
 from hysop.tools.decorators import debug, deprecated
 from hysop.tools.numpywrappers import npw
 from hysop.tools.string_utils import prepend
@@ -688,7 +688,7 @@ class CartesianTopology(CartesianTopologyView, Topology):
             msg = 'The given cartesian topology does not fit with the others'
             msg += ' input parameters.'
             assert (dim is None) or (cartesian_topology.dim == dim), msg
-            assert (shape is None) or (cartesian_topology.size == np.prod(shape)), msg
+            assert (shape is None) or (cartesian_topology.size == prod(shape)), msg
             assert (shape is None) or (shape ==
                     npw.asintegerarray(cartesian_topology.dims)).all(), msg
             assert (is_periodic is None) or (is_periodic == cartesian_topology.periods), msg
@@ -753,13 +753,13 @@ class CartesianTopology(CartesianTopologyView, Topology):
             is_distributed[-1] = True
         cart_shape = shape[is_distributed]
         cart_dim  = cart_shape.size
-        cart_size = np.prod(cart_shape)
+        cart_size = prod(cart_shape)
         
         is_periodic    = is_periodic * is_distributed
         is_distributed = is_distributed
 
         assert (cart_dim>0) and (cart_dim <= domain_dim)
-        assert np.prod(cart_shape) == np.prod(shape)
+        assert prod(cart_shape) == prod(shape)
         assert (cart_shape <= shape[is_distributed]).all()
         assert cart_size <= parent_size
         assert is_periodic.size    == domain_dim
-- 
GitLab