From fd2f98aa6e85921cd8e91e5c0d2f1f0189360594 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr> Date: Fri, 7 Apr 2017 16:56:17 +0200 Subject: [PATCH] codegen debug --- hysop/__init__.py | 2 +- hysop/backend/arrays/opencl_array.py | 1 + hysop/backend/arrays/opencl_backend.py | 3 +- hysop/backend/codegen/base/cl_extensions.py | 2 +- hysop/backend/codegen/base/codegen.py | 84 ++--- hysop/backend/codegen/base/enum_codegen.py | 2 +- .../backend/codegen/base/function_codegen.py | 13 +- hysop/backend/codegen/base/kernel_codegen.py | 7 +- hysop/backend/codegen/base/opencl_codegen.py | 5 +- hysop/backend/codegen/base/statistics.py | 52 +-- hysop/backend/codegen/base/struct_codegen.py | 2 +- hysop/backend/codegen/base/test.py | 22 +- hysop/backend/codegen/base/variables.py | 12 +- .../codegen/functions/advection_rhs.py | 10 +- .../codegen/functions/apply_stencil.py | 66 ++-- hysop/backend/codegen/functions/cache_load.py | 11 +- .../codegen/functions/compute_index.py | 2 +- hysop/backend/codegen/functions/empty.py | 2 +- hysop/backend/codegen/functions/gradient.py | 4 +- .../backend/codegen/functions/runge_kutta.py | 7 +- .../codegen/functions/stretching_rhs.py | 34 +- hysop/backend/codegen/kernels/bandwidth.py | 6 +- hysop/backend/codegen/kernels/copy_kernel.py | 12 +- .../codegen/kernels/directional_advection.py | 33 +- .../codegen/kernels/directional_stretching.py | 44 +-- hysop/backend/codegen/kernels/empty.py | 2 +- hysop/backend/codegen/kernels/stretching.py | 2 +- .../tests/test_directional_advection.py | 7 +- .../tests/test_directional_stretching.py | 23 +- hysop/backend/codegen/kernels/transpose.py | 2 +- hysop/backend/codegen/structs/mesh_info.py | 49 +-- hysop/backend/opencl/cltypes.py | 333 ------------------ hysop/backend/opencl/discrete.py | 8 +- hysop/backend/opencl/opencl_discrete.py | 6 +- .../opencl/{clenv.py => opencl_env.py} | 4 +- hysop/backend/opencl/tools.py | 10 +- hysop/deps.py | 2 +- hysop/numerics/fftw_f/fft3d.f90 | 2 +- hysop/numerics/stencil/stencil.py | 50 +-- hysop/old/gpu.old/QtRendering.py | 10 +- hysop/old/gpu.old/gpu_diffusion.py | 12 +- hysop/old/gpu.old/gpu_discrete.py | 16 +- hysop/old/gpu.old/gpu_kernel.py | 4 +- .../gpu.old/gpu_multiphase_baroclinic_rhs.py | 8 +- .../old/gpu.old/gpu_multiresolution_filter.py | 16 +- hysop/old/gpu.old/gpu_particle_advection.py | 28 +- .../old/gpu.old/gpu_particle_advection_dir.py | 12 +- hysop/old/gpu.old/gpu_stretching.py | 8 +- .../gpu.old/multi_gpu_particle_advection.py | 16 +- .../static_gpu_particle_advection_dir.py | 16 +- hysop/old/gpu.old/tests/test_copy.py | 16 +- .../gpu.old/tests/test_opencl_environment.py | 6 +- hysop/old/gpu.old/tests/test_transposition.py | 50 +-- hysop/old/gpu.old/tools.py | 16 +- hysop/old/gpu.old/visu/marchingcube.py | 2 +- hysop/tools/sympy_utils.py | 13 +- 56 files changed, 418 insertions(+), 769 deletions(-) delete mode 100644 hysop/backend/opencl/cltypes.py rename hysop/backend/opencl/{clenv.py => opencl_env.py} (99%) diff --git a/hysop/__init__.py b/hysop/__init__.py index 34130caa9..9fa51f813 100644 --- a/hysop/__init__.py +++ b/hysop/__init__.py @@ -24,7 +24,7 @@ __PROFILE__ = "OFF" in ["0", "1"] __ENABLE_LONG_TESTS__ = "OFF" is "ON" # OpenCL -__DEFAULT_PLATFORM_ID__ = 0 +__DEFAULT_PLATFORM_ID__ = 1 __DEFAULT_DEVICE_ID__ = 0 diff --git a/hysop/backend/arrays/opencl_array.py b/hysop/backend/arrays/opencl_array.py index 22e25bf16..d526a27c8 100644 --- a/hysop/backend/arrays/opencl_array.py +++ b/hysop/backend/arrays/opencl_array.py @@ -1,4 +1,5 @@ + import numpy as np from hysop.backend.opencl import clArray, clTools from hysop.backend.arrays import MemoryType, MemoryOrdering diff --git a/hysop/backend/arrays/opencl_backend.py b/hysop/backend/arrays/opencl_backend.py index 346ab6c2e..584d55671 100644 --- a/hysop/backend/arrays/opencl_backend.py +++ b/hysop/backend/arrays/opencl_backend.py @@ -25,6 +25,7 @@ default_cl_context = default_cl_env.context() default_cl_queue = default_cl_env.default_queue() default_cl_allocator = default_cl_env.allocator() default_cl_pool = default_cl_env.memory_pool() + class _ElementwiseKernel(object): """ @@ -2157,7 +2158,7 @@ class OpenClBackend(ArrayBackend): if is_integer(x1): expr = '(x1[i]==0 ? 0 : {})'.format(expr) convert_inputs=(np.float64,None) - if is_fp(x2): + if is_fp(x1) or is_fp(x2): expr = '(isnan(x0[i])||isnan(x1[i])?NAN:(isinf(x1[i])?(isinf(x0[i])?NAN:(signbit(x0[i])^signbit(x1[i])?x1[i]:x0[i])):{}))'.format(expr) convert_inputs='f' expr='y0[i] = '+expr diff --git a/hysop/backend/codegen/base/cl_extensions.py b/hysop/backend/codegen/base/cl_extensions.py index dae06467a..75c042c99 100644 --- a/hysop/backend/codegen/base/cl_extensions.py +++ b/hysop/backend/codegen/base/cl_extensions.py @@ -17,7 +17,7 @@ _cl_extension_custom_declarations = { } from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.test import _test_typegen class ClExtCodeGen(OpenClCodeGenerator): diff --git a/hysop/backend/codegen/base/codegen.py b/hysop/backend/codegen/base/codegen.py index 952578727..a7572c3e7 100644 --- a/hysop/backend/codegen/base/codegen.py +++ b/hysop/backend/codegen/base/codegen.py @@ -314,10 +314,8 @@ class CodeGenerator(object): @contextmanager def _var_block_(self): vb = self.VarBlock() - try: - yield vb - finally: - self.append(vb.code()) + yield vb + self.append(vb.code()) class AlignBlock(object): @@ -366,13 +364,8 @@ class CodeGenerator(object): @contextmanager def _align_(self,sep='$'): ab = self.AlignBlock(sep=sep) - try: - yield ab - except: - raise - finally: - self.append(ab.code()) - + yield ab + self.append(ab.code()) def declare_codeblocks(self,names,initial_code=None,priorities=None,comments=None): if not isinstance(names,list): @@ -412,16 +405,14 @@ class CodeGenerator(object): elif priority is not None and self.blocks[blockname][0] != priority: raise ValueError('Priority mismatch!') - try: - code = self.code - indent = self.indent_level - self.indent_level = self.initial_indent_level - self.code = self.blocks[blockname][1] - yield - finally: - self.blocks[blockname][1] = self.code - self.code = code - self.indent_level = indent + code = self.code + indent = self.indent_level + self.indent_level = self.initial_indent_level + self.code = self.blocks[blockname][1] + yield + self.blocks[blockname][1] = self.code + self.code = code + self.indent_level = indent def block_exists(self,name): return (name in self.blocks) @@ -451,38 +442,35 @@ class CodeGenerator(object): def _block_(self,header_prefix='',header_postfix='',footer_postfix='',compact=False): count = 1-int(compact) newline = not compact - try: - header = header_prefix + '{' + header_postfix - self.append(header,newline).indent(count) - yield - finally: - if compact: - self.supress_newline() - self.code += ' ' - footer = '}' + footer_postfix - self.dedent(count).append(footer) + + header = header_prefix + '{' + header_postfix + self.append(header,newline).indent(count) + yield + if compact: + self.supress_newline() + self.code += ' ' + footer = '}' + footer_postfix + self.dedent(count).append(footer) #conditional facilities @contextmanager def _if_(self,cond,compact=False,force_spaces=False): - try: - if cond=='true': - yield - elif cond=='false': - code = self.code - indent = self.indent_level - self.code = '' - yield - else: - sep = self.space() if force_spaces else self.empty() - header_prefix = 'if (' + sep + cond + sep + ') ' - with self._block_(header_prefix=header_prefix,compact=compact) as b: - yield b - finally: - if cond=='false': - self.code = code - self.indent_level = indent + if cond=='true': + yield + elif cond=='false': + code = self.code + indent = self.indent_level + self.code = '' + yield + else: + sep = self.space() if force_spaces else self.empty() + header_prefix = 'if (' + sep + cond + sep + ') ' + with self._block_(header_prefix=header_prefix,compact=compact) as b: + yield b + if cond=='false': + self.code = code + self.indent_level = indent @contextmanager def _elif_(self,cond,compact=False,force_spaces=True): sep = self.space() if force_spaces else self.empty() diff --git a/hysop/backend/codegen/base/enum_codegen.py b/hysop/backend/codegen/base/enum_codegen.py index b997591fa..6741e2325 100644 --- a/hysop/backend/codegen/base/enum_codegen.py +++ b/hysop/backend/codegen/base/enum_codegen.py @@ -2,7 +2,7 @@ import numpy as np import pyopencl as cl from hysop.backend.codegen.base.codegen import CodeGenerator -from hysop.backend.codegen.base.types import TypeGen +from hysop.backend.opencl.opencl_types import TypeGen from hysop.tools.enum import EnumFactory from hysop.backend.codegen.base.variables import register_ctype_dtype diff --git a/hysop/backend/codegen/base/function_codegen.py b/hysop/backend/codegen/base/function_codegen.py index 654c9a4f7..3e10b411b 100644 --- a/hysop/backend/codegen/base/function_codegen.py +++ b/hysop/backend/codegen/base/function_codegen.py @@ -2,10 +2,11 @@ import pyopencl as cl from contextlib import contextmanager +from hysop.tools.types import check_instance from hysop.backend.codegen.base.codegen import CodeGenerator from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.utils import ArgDict -from hysop.backend.codegen.base.types import TypeGen, OpenClTypeGen +from hysop.backend.opencl.opencl_types import TypeGen, OpenClTypeGen from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin, CodegenVectorClBuiltinFunc class FunctionBase(object): @@ -17,7 +18,7 @@ class FunctionBase(object): super(FunctionBase,self).__init__(typegen=typegen,**kargs) known_args = ArgDict() if (known_args is None) else known_args - assert isinstance(fargs, ArgDict) + check_instance(fargs,ArgDict) fargs.release() for varname, varval in known_args.iteritems(): @@ -88,11 +89,11 @@ class FunctionCodeGenerator(FunctionBase, CodeGenerator): def __init__(self,basename,typegen,output, args=None,known_args=None,inline=False,ext='.tmp'): - assert isinstance(typegen, TypeGen) + check_instance(typegen,TypeGen) name = basename fname = basename if args is not None: - assert isinstance(args, ArgDict) + check_instance(args,ArgDict) fname += args.function_name_suffix(output) name += args.codegen_name_suffix(output) @@ -117,11 +118,11 @@ class OpenClFunctionCodeGenerator(FunctionBase, OpenClCodeGenerator): def __init__(self,basename,typegen,output, args=None,known_args=None,inline=False,ext='.cl'): - assert isinstance(typegen, OpenClTypeGen) + check_instance(typegen,OpenClTypeGen) name = basename fname = basename if args is not None: - assert isinstance(args, ArgDict) + check_instance(args,ArgDict) fname += args.function_name_suffix(output) name += args.codegen_name_suffix(output) diff --git a/hysop/backend/codegen/base/kernel_codegen.py b/hysop/backend/codegen/base/kernel_codegen.py index 859ed8db1..64b17dcff 100644 --- a/hysop/backend/codegen/base/kernel_codegen.py +++ b/hysop/backend/codegen/base/kernel_codegen.py @@ -1,10 +1,11 @@ from contextlib import contextmanager +from hysop.tools.types import check_instance from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.function_codegen import FunctionBase from hysop.backend.codegen.base.utils import VarDict, ArgDict, WriteOnceDict -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin, \ CodegenVectorClBuiltinFunc @@ -27,8 +28,8 @@ class KernelCodeGenerator(KernelBase, OpenClCodeGenerator): kernel_args = ArgDict() if (kernel_args is None) else kernel_args known_vars = WriteOnceDict() if (known_vars is None) else known_vars - assert isinstance(typegen, OpenClTypeGen) - assert isinstance(kernel_args, ArgDict) + check_instance(typegen,OpenClTypeGen) + check_instance(kernel_args,ArgDict) assert work_dim>0 and work_dim<=3 if (vec_type_hint is not None): diff --git a/hysop/backend/codegen/base/opencl_codegen.py b/hysop/backend/codegen/base/opencl_codegen.py index 9179e7011..7891a18a1 100644 --- a/hysop/backend/codegen/base/opencl_codegen.py +++ b/hysop/backend/codegen/base/opencl_codegen.py @@ -2,8 +2,9 @@ import pyopencl as cl from contextlib import contextmanager +from hysop.tools.types import check_instance from hysop.backend.codegen.base.codegen import CodeGenerator -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import VarDict from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin, CodegenVectorClBuiltinFunc @@ -31,7 +32,7 @@ class OpenClCodeGenerator(CodeGenerator): known_vars=None, declare_cl_exts=True, **kargs): - assert isinstance(typegen, OpenClTypeGen) + check_instance(typegen,OpenClTypeGen) super(OpenClCodeGenerator,self).__init__(name=name,typegen=typegen, ext=ext, known_vars=known_vars, keywords=self.default_keywords, escape_seqs=self.default_escape_seqs, **kargs) diff --git a/hysop/backend/codegen/base/statistics.py b/hysop/backend/codegen/base/statistics.py index 6115d9d0c..25de0f49d 100644 --- a/hysop/backend/codegen/base/statistics.py +++ b/hysop/backend/codegen/base/statistics.py @@ -2,8 +2,9 @@ import copy import numpy as np +from hysop.tools.types import check_instance from hysop.tools.units import bytes2str -from hysop.backend.codegen.base.types import vsizes, signed_base_types, unsigned_base_types, float_base_types +from hysop.backend.opencl.opencl_types import vsizes, signed_base_types, unsigned_base_types, float_base_types dtype_ops = { np.int8: (0.25, 'IOPS'), @@ -44,7 +45,7 @@ _fill_dtype_ops() class WorkStatistics(object): def __init__(self, stat=None): if (stat is not None): - assert isinstance(stat, WorkStatistics) + check_instance(stat,WorkStatistics) self.global_mem_byte_reads = stat.global_mem_byte_reads self.global_mem_byte_writes = stat.global_mem_byte_writes self.local_mem_byte_reads = stat.local_mem_byte_reads @@ -62,23 +63,28 @@ class WorkStatistics(object): def global_mem_transactions(self): return self.global_mem_byte_writes + self.global_mem_byte_reads - def global_mem_write_ratio(self): + def global_mem_rw_ratio(self): return float(self.global_mem_byte_writes)/self.global_mem_transactions() def global_mem_read_ratio(self): return float(self.global_mem_byte_reads)/self.global_mem_transactions() def local_mem_transactions(self): return self.local_mem_byte_writes + self.local_mem_byte_reads - def local_mem_write_ratio(self): + def local_mem_rw_ratio(self): return float(self.local_mem_byte_writes)/self.local_mem_transactions() def local_mem_read_ratio(self): return float(self.local_mem_byte_reads)/self.local_mem_transactions() def total_mem_transactions(self): return self.local_mem_transactions() + self.global_mem_transactions() + + def has_local_mem_transactions(self): + return (self.local_mem_transactions() > 0) + def has_global_mem_transactions(self): + return (self.global_mem_transactions() > 0) def __add__(self, rhs): - assert isinstance(rhs, WorkStatistics) + check_instance(rhs,WorkStatistics) stats = copy.deepcopy(self) stats.global_mem_byte_reads += rhs.global_mem_byte_reads stats.global_mem_byte_writes += rhs.global_mem_byte_writes @@ -93,7 +99,7 @@ class WorkStatistics(object): return stats def __mul__(self, rhs): - assert isinstance(rhs,int) + check_instance(rhs,int) stats = copy.deepcopy(self) stats.global_mem_byte_reads *= rhs stats.global_mem_byte_writes *= rhs @@ -104,28 +110,30 @@ class WorkStatistics(object): return stats def __rmul__(self, lhs): - assert isinstance(lhs,int) + check_instance(lhs,int) return self.__mul__(lhs) def __str__(self): op_count = [''] + ['{}: {}'.format(k,v) for (k,v) in self.ops_per_type.iteritems() ] op_count = '\n '.join(op_count) - ss = \ -''':: Work Statistics :: - Global memory: load={} store={} total={} write_ratio={} - Local memory: load={} store={} total={} write_ratio={} - Operations count:{} -'''.format( - bytes2str(self.global_mem_byte_reads), - bytes2str(self.global_mem_byte_writes), - bytes2str(self.global_mem_transactions()), - round(self.global_mem_write_ratio(),2), - bytes2str(self.local_mem_byte_reads), - bytes2str(self.local_mem_byte_writes), - bytes2str(self.local_mem_transactions()), - round(self.local_mem_write_ratio(),2), - op_count) + ss = ':: Work Statistics ::' + + if self.has_global_mem_transactions(): + ss += '\n Global memory: load={} store={} total={} rw_ratio={}'.format( + bytes2str(self.global_mem_byte_reads), + bytes2str(self.global_mem_byte_writes), + bytes2str(self.global_mem_transactions()), + round(self.global_mem_rw_ratio(),2)) + + if self.has_local_mem_transactions(): + ss += '\n Local memory: load={} store={} total={} rw_ratio={}'.format( + bytes2str(self.local_mem_byte_reads), + bytes2str(self.local_mem_byte_writes), + bytes2str(self.local_mem_transactions()), + round(self.local_mem_rw_ratio(),2)) + + ss += '\n Operations count: {}'.format(op_count) return ss diff --git a/hysop/backend/codegen/base/struct_codegen.py b/hysop/backend/codegen/base/struct_codegen.py index d805e7346..41a85509e 100644 --- a/hysop/backend/codegen/base/struct_codegen.py +++ b/hysop/backend/codegen/base/struct_codegen.py @@ -5,7 +5,7 @@ import pyopencl.tools import numpy as np import re -from hysop.backend.codegen.base.types import np_dtype +from hysop.backend.opencl.opencl_types import np_dtype from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.variables import VarDict, CodegenVariable, CodegenVector, CodegenStruct, CodegenVectorClBuiltin from hysop.backend.codegen.base.variables import register_ctype_dtype diff --git a/hysop/backend/codegen/base/test.py b/hysop/backend/codegen/base/test.py index 79c32d764..9aacfa1cb 100644 --- a/hysop/backend/codegen/base/test.py +++ b/hysop/backend/codegen/base/test.py @@ -1,29 +1,18 @@ from hysop.constants import np -from hysop.backend.opencl import pyopencl -from hysop.constants import MeshDirection, TranspositionState +from hysop.backend.opencl import cl from hysop.backend.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct - -from hysop import __DEFAULT_PLATFORM_ID__, __DEFAULT_DEVICE_ID__ -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen ## Quickly get a default opencl typegen **ONLY** for testing or debugging purpose ## => do not use in production code def _test_typegen(fbtype='float',float_dump_mode='dec'): - try: - default_platform = pyopencl.get_platforms()[__DEFAULT_PLATFORM_ID__] - except: - default_platform = pyopencl.get_platforms()[0] - try: - default_device = default_platform.get_devices()[__DEFAULT_DEVICE_ID__] - except: - default_device = default_platform.get_devices()[0] - default_context = pyopencl.Context(devices=[default_device]) + from hysop.backend.arrays.opencl_backend import\ + default_cl_context, default_cl_device, default_cl_platform return OpenClTypeGen( - context=default_context, device=default_device, platform=default_platform, + context=default_cl_context, device=default_cl_device, platform=default_cl_platform, fbtype=fbtype,float_dump_mode=float_dump_mode) - def _test_mesh_info(typegen,dim,ghosts,resolution,**kargs): ghosts = [ghosts]*dim if np.isscalar(ghosts) else ghosts ghosts = np.asarray(ghosts)[:dim] @@ -62,7 +51,6 @@ def _test_mesh_info(typegen,dim,ghosts,resolution,**kargs): ghosts=ghosts, dx=dx, local_mesh=local_mesh, global_mesh=global_mesh, - mesh_dir=MeshDirection.X, mesh_state=TranspositionState.XYZ, **kargs) return (np_mis, cg_mis) diff --git a/hysop/backend/codegen/base/variables.py b/hysop/backend/codegen/base/variables.py index e86244d3f..9681eba4a 100644 --- a/hysop/backend/codegen/base/variables.py +++ b/hysop/backend/codegen/base/variables.py @@ -1,13 +1,13 @@ import re -import hysop.backend.codegen.base.types +import hysop.backend.opencl.opencl_types from hysop.constants import np from hysop.backend.codegen.base.utils import VarDict -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen # opencl extras -from hysop.backend.codegen.base.types import cl_type_to_dtype +from hysop.backend.opencl.opencl_types import cl_type_to_dtype _ctype_to_dtype = { 'bool': np.bool_, @@ -264,11 +264,13 @@ class CodegenVariable(object): return self.__call__() else: assert self.ptr, '{} is not a pointer!'.format(self.name) - def __str__(self): + def __repr__(self): if self.is_symbolic(): return '{}({})'.format(self.name,self.ctype) else: return '{}({},{})'.format(self.name,self.ctype,self.value) + def __str__(self): + return self.sval() class CodegenArray(CodegenVariable): @staticmethod @@ -685,7 +687,7 @@ class CodegenStruct(CodegenVariable): var = var_overrides[fieldname] var.struct_var = self var.const = self.const - elif is_struct and (ctype in hysop.backend.codegen.base.types.builtin_types): + elif is_struct and (ctype in hysop.backend.opencl.opencl_types.builtin_types): tg=struct.typegen btype = tg.basetype(ctype) dim = tg.components(ctype) diff --git a/hysop/backend/codegen/functions/advection_rhs.py b/hysop/backend/codegen/functions/advection_rhs.py index fa927ce37..2e9add494 100644 --- a/hysop/backend/codegen/functions/advection_rhs.py +++ b/hysop/backend/codegen/functions/advection_rhs.py @@ -1,15 +1,16 @@ +from hysop.tools.types import check_instance from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.function_codegen import OpenClFunctionCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.codegen.base.statistics import WorkStatistics from hysop.constants import BoundaryCondition from hysop.backend.codegen.functions.apply_stencil import ApplyStencilFunction -from hysop.numerics.stencil.stencil import Stencil1D, Stencil +from hysop.numerics.stencil.stencil import Stencil # compute alpha*grad(u).w + (1-alpha)*grad(u)^T.w # where u may be is_cached and w is private @@ -22,12 +23,9 @@ class DirectionalAdvectionRhsFunction(OpenClFunctionCodeGenerator): known_args=None): assert work_dim>=1 and work_dim<=3 - assert boundary in BoundaryCondition.entries() + check_instance(boundary,BoundaryCondition) assert nparticles in [1,2,4,8,16] - boundary = BoundaryCondition.value(boundary) - sboundary = BoundaryCondition.svalue(boundary) - is_periodic = (boundary==BoundaryCondition.PERIODIC) if is_cached: diff --git a/hysop/backend/codegen/functions/apply_stencil.py b/hysop/backend/codegen/functions/apply_stencil.py index 98b0e3294..cd65cb631 100644 --- a/hysop/backend/codegen/functions/apply_stencil.py +++ b/hysop/backend/codegen/functions/apply_stencil.py @@ -1,8 +1,10 @@ +from hysop.deps import sm +from hysop.tools.types import check_instance from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.function_codegen import OpenClFunctionCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import ArgDict from hysop.backend.codegen.base.statistics import WorkStatistics @@ -11,6 +13,7 @@ from hysop.numerics.stencil.stencil import Stencil class ApplyStencilFunction(OpenClFunctionCodeGenerator): def __init__(self,typegen,stencil,ftype, + symbol2var=None, components=1, vectorize=True, extra_inputs=[], scalar_inputs=[], @@ -23,7 +26,14 @@ class ApplyStencilFunction(OpenClFunctionCodeGenerator): itype='int', known_args=None): - assert isinstance(stencil, Stencil) + check_instance(stencil,Stencil) + check_instance(symbol2var, dict, keys=sm.Symbol, values=CodegenVariable, allow_none=True) + assert set(symbol2var.keys())==stencil.variables() + + extra_inputs = set(extra_inputs + symbol2var.values()) + scalar_inputs = set(scalar_inputs) + vector_inputs = set(vector_inputs) + dim = stencil.dim vtype = typegen.vtype(ftype,components) @@ -37,20 +47,25 @@ class ApplyStencilFunction(OpenClFunctionCodeGenerator): for iname in vector_inputs: if vectorize: name = iname - args[name] = CodegenVariable(name, vtype, typegen, const=True, add_impl_const=True, storage=data_storage, ptr=True, restrict=restrict) + args[name] = CodegenVariable(name, vtype, typegen, const=True, + add_impl_const=True, storage=data_storage, ptr=True, restrict=restrict) else: for i in xrange(components): name = '{}{}'.format(iname,vector_suffixes[i]) - args[name] = CodegenVariable(name, ftype, typegen, const=True, add_impl_const=True, storage=data_storage, ptr=True, restrict=restrict) + args[name] = CodegenVariable(name, ftype, typegen, + const=True, add_impl_const=True, storage=data_storage, + ptr=True, restrict=restrict) for iname in scalar_inputs: name = iname - args[name] = CodegenVariable(name, ftype, typegen, const=True, add_impl_const=True, storage=data_storage, ptr=True, restrict=restrict) + args[name] = CodegenVariable(name, ftype, typegen, const=True, + add_impl_const=True, storage=data_storage, ptr=True, restrict=restrict) for arg in extra_inputs: args[arg.name] = arg if not has_custom_id: args['offset'] = CodegenVariable('offset', itype, typegen, add_impl_const=True) - args['stride'] = CodegenVectorClBuiltin('stride', itype, dim, typegen, add_impl_const=True,nl=True) + args['stride'] = CodegenVectorClBuiltin('stride', itype, dim, typegen, + add_impl_const=True,nl=True) for varname,vartype in multipliers.iteritems(): if vartype=='ftype': vartype=ftype @@ -81,6 +96,7 @@ class ApplyStencilFunction(OpenClFunctionCodeGenerator): self.has_custom_id = has_custom_id self.custom_id = custom_id self.op = op + self.symbol2var = symbol2var self.gencode() @@ -101,7 +117,7 @@ class ApplyStencilFunction(OpenClFunctionCodeGenerator): cached = self.is_cached() ncoeffs = self.stencil.non_zero_coefficients() - nmul = len(self.multipliers) + nmul = len(self.multipliers)+int(self.stencil.has_factor()) stats = WorkStatistics() @@ -135,11 +151,8 @@ class ApplyStencilFunction(OpenClFunctionCodeGenerator): ftype = s.ftype vtype = s.vtype + stencil = s.stencil - has_multiplier = ('multiplier' in s.vars.keys()) - if has_multiplier: - multiplier = s.vars['multiplier'] - res = CodegenVectorClBuiltin('res',ftype,components,tg) with s._function_(): @@ -150,39 +163,44 @@ class ApplyStencilFunction(OpenClFunctionCodeGenerator): if i>0: al.jumpline() operands = {} - if self.vectorized: - vector_varnames = self.vector_inputs + if s.vectorized: + vector_varnames = s.vector_inputs _res = res() else: - vector_varnames = ['{}{}'.format(varname,vector_suffixes[i]) for varname in self.vector_inputs] + vector_varnames = ['{}{}'.format(varname,vector_suffixes[i]) + for varname in s.vector_inputs] _res = res[i] for j,vn in enumerate(vector_varnames): operands['vinput{}'.format(j)] = s.vars[vn]() - for j,vn in enumerate(self.scalar_inputs): + for j,vn in enumerate(s.scalar_inputs): operands['sinput{}'.format(j)] = s.vars[vn]() - for (off,coeff) in self.stencil.iteritems(): + for (off,coeff) in stencil.iteritems(include_factor=False, + svars=s.symbol2var): + if coeff=='0': + continue if not has_custom_id: offset = s.vars['offset'] stride = s.vars['stride'] strided='' for i in xrange(dim): - if off[i]==0: - continue if stride.known() and stride.value[0] == 1: strided+='{}'.format(tg.dump(off[i])) else: strided+='{}*{}'.format(tg.dump(off[i]),stride[i]) operands['id'] = '{}+{}'.format(offset(),strided) else: - operands['id'] = self.custom_id.format(offset=tg.dump(off[0])) - code = '{} += {} $* {};'.format(_res,tg.dump(coeff),self.op.format(**operands)) + operands['id'] = s.custom_id.format(offset=tg.dump(off[0])) + code = '{} += {} $* {};'.format(_res,tg.dump(coeff), + s.op.format(**operands)) al.append(code) if vectorized: break mul = '' - for mult in self.multipliers: + for mult in s.multipliers: mul+='{}*'.format(s.vars[mult]()) + if stencil.has_factor(): + mul+='{}*'.format(stencil.format_factor(s.symbol2var)) ret = 'return {}{};'.format(mul,res()) s.append(ret) @@ -190,16 +208,16 @@ class ApplyStencilFunction(OpenClFunctionCodeGenerator): if __name__ == '__main__': - from hysop.backend.codegen.base.test import test_typegen + from hysop.backend.codegen.base.test import _test_typegen stencil = Stencil([2.0,1.0,0.0,-1.0,-2.0], origin=2, order=2) - tg = test_typegen('double', float_dump_mode='hex') + tg = _test_typegen('double', float_dump_mode='hex') asf = ApplyStencilFunction(tg,stencil,ftype=tg.fbtype, components=3, vectorize=False, data_storage='__local', scalar_inputs = ['S'], vector_inputs=['A','B'], vector_suffixes=['x','y','z'], op='{sinput0}[{id}] * ({vinput0}[{id}] + {vinput1}[{id}])', multipliers={'a':'int','b':'float','c':'ftype','d':'vtype'}) - #print asf.per_work_statistics() + print asf.per_work_statistics() asf.edit() diff --git a/hysop/backend/codegen/functions/cache_load.py b/hysop/backend/codegen/functions/cache_load.py index 5adeba9be..1a815b24b 100644 --- a/hysop/backend/codegen/functions/cache_load.py +++ b/hysop/backend/codegen/functions/cache_load.py @@ -2,10 +2,11 @@ import contextlib from contextlib import contextmanager +from hysop.tools.types import check_instance from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.function_codegen import OpenClFunctionCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import ArgDict from hysop.backend.codegen.base.statistics import WorkStatistics @@ -32,18 +33,16 @@ class CacheLoadFunction(OpenClFunctionCodeGenerator): assert work_dim>0 - assert boundary in BoundaryCondition.entries() - boundary = BoundaryCondition.value(boundary) - sboundary = BoundaryCondition.svalue(boundary) + check_instance(boundary,BoundaryCondition) if boundary not in [BoundaryCondition.NONE, BoundaryCondition.PERIODIC]: - raise NotImplemented('Boundary \'{}\' not implemented yet!'.format(sboundary.lower())) + raise NotImplemented('Boundary \'{}\' not implemented yet!'.format(str(boundary).lower())) tg = typegen fs = force_symbolic vtype = tg.vtype(ftype,components) name = 'cache_load_{}d'.format(work_dim) if boundary != BoundaryCondition.NONE: - name+='_{}'.format(sboundary.lower()) + name+='_{}'.format(str(boundary).lower()) output = 'void' args = ArgDict() diff --git a/hysop/backend/codegen/functions/compute_index.py b/hysop/backend/codegen/functions/compute_index.py index dd052c855..d1de3c489 100644 --- a/hysop/backend/codegen/functions/compute_index.py +++ b/hysop/backend/codegen/functions/compute_index.py @@ -3,7 +3,7 @@ from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.function_codegen import OpenClFunctionCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import ArgDict from hysop.backend.codegen.base.statistics import WorkStatistics diff --git a/hysop/backend/codegen/functions/empty.py b/hysop/backend/codegen/functions/empty.py index 43832ee27..8070ef4ff 100644 --- a/hysop/backend/codegen/functions/empty.py +++ b/hysop/backend/codegen/functions/empty.py @@ -2,7 +2,7 @@ from hysop.backend.codegen.base.codegen import CodeGenerator from hysop.backend.codegen.base.function_codegen import FunctionCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable -from hysop.backend.codegen.base.types import TypeGen +from hysop.backend.opencl.opencl_types import TypeGen from hysop.backend.codegen.base.utils import ArgDict class EmptyFunction(FunctionCodeGenerator): diff --git a/hysop/backend/codegen/functions/gradient.py b/hysop/backend/codegen/functions/gradient.py index 6896626dd..20ff2eff2 100644 --- a/hysop/backend/codegen/functions/gradient.py +++ b/hysop/backend/codegen/functions/gradient.py @@ -4,10 +4,10 @@ import numpy as np from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.function_codegen import OpenClFunctionCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import ArgDict -from hysop.numerics.stencil.stencil import Stencil1D, Stencil +from hysop.numerics.stencil.stencil import Stencil from hysop.backend.codegen.functions.compute_index import ComputeIndexFunction from hysop.backend.codegen.functions.cache_load import CacheLoadFunction diff --git a/hysop/backend/codegen/functions/runge_kutta.py b/hysop/backend/codegen/functions/runge_kutta.py index bc57fadf9..035c12636 100644 --- a/hysop/backend/codegen/functions/runge_kutta.py +++ b/hysop/backend/codegen/functions/runge_kutta.py @@ -1,10 +1,11 @@ import hashlib, copy import numpy as np +from hysop.tools.types import check_instance from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.function_codegen import OpenClFunctionCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin, CodegenArray -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import ArgDict from hysop.backend.codegen.base.statistics import WorkStatistics @@ -23,8 +24,8 @@ class RungeKuttaFunction(OpenClFunctionCodeGenerator): used_vars=_default_used_vars, known_args=None): - assert isinstance(method, ExplicitRungeKutta) - assert isinstance(rhs, OpenClFunctionCodeGenerator) + check_instance(method,ExplicitRungeKutta) + check_instance(rhs,OpenClFunctionCodeGenerator) method.dump = typegen.dump #find out rhs function arguments diff --git a/hysop/backend/codegen/functions/stretching_rhs.py b/hysop/backend/codegen/functions/stretching_rhs.py index f7304c2ac..47b040e00 100644 --- a/hysop/backend/codegen/functions/stretching_rhs.py +++ b/hysop/backend/codegen/functions/stretching_rhs.py @@ -1,8 +1,10 @@ +from hysop.deps import sm +from hysop.tools.types import check_instance from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.function_codegen import OpenClFunctionCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.codegen.base.statistics import WorkStatistics from hysop.methods import StretchingFormulation @@ -33,14 +35,9 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): assert dim==3 assert direction<dim assert order>1 and order%2==0 - assert formulation in StretchingFormulation.entries() - assert boundary in BoundaryCondition.entries() + check_instance(formulation,StretchingFormulation) + check_instance(boundary,BoundaryCondition) - formulation = StretchingFormulation.value(formulation) - sformulation = StretchingFormulation.svalue(formulation) - boundary = BoundaryCondition.value(boundary) - sboundary = BoundaryCondition.svalue(boundary) - is_conservative = (formulation==StretchingFormulation.CONSERVATIVE) is_periodic = (boundary==BoundaryCondition.PERIODIC) @@ -52,7 +49,7 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): vtype = typegen.vtype(ftype,dim) (args,basename) = self.build_prototype(typegen,dim,itype,ftype,vtype,order,direction,cached, - restrict,storage,vectorize_u,used_variables,sformulation,is_conservative,is_periodic) + restrict,storage,vectorize_u,used_variables,formulation,is_conservative,is_periodic) reqs = self.build_requirements(typegen,dim,itype,ftype,vtype,order,direction,boundary,cached, restrict,storage,vectorize_u,used_variables,is_conservative,is_periodic,args) @@ -82,7 +79,7 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): self.gencode() def build_prototype(self,typegen,dim,itype,ftype,vtype,order,direction,cached, - restrict,storage, vectorize_u,used_variables,sformulation,is_conservative,is_periodic): + restrict,storage, vectorize_u,used_variables,formulation,is_conservative,is_periodic): U = used_variables['U'] W = used_variables['W'] @@ -115,7 +112,8 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): args['Lx'] = CodegenVariable('Lx', itype, typegen, add_impl_const=True, nl=True) args['active'] = CodegenVariable('active','bool',typegen) - basename = 'stretching_rhs_{}_{}{}{}_fdc{}'.format(sformulation.lower(),ftype[0],dim,('v' if vectorize_u else ''),order) + basename = 'stretching_rhs_{}_{}{}{}_fdc{}'.format(str(formulation).lower(), + ftype[0],dim,('v' if vectorize_u else ''),order) basename+='_'+XYZ[direction] return (args,basename) @@ -124,7 +122,7 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): assert order%2==0 h = order/2 sg = StencilGenerator() - sg.configure(derivative=2) + sg.configure(dim=1,derivative=1,order=order) stencil = sg.generate_exact_stencil(origin=h) return stencil @@ -157,8 +155,16 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): custom_id=None known_args={'stride':1} + inv_dx_s = sm.Symbol('inv_dx') + inv_dx_var = CodegenVariable('inv_dx', ftype, typegen, add_impl_const=True, nl=True) + + stencil = self.build_stencil(order) + stencil.replace_symbols({stencil.dx:1/inv_dx_s}) + symbol2var = {inv_dx_s:inv_dx_var} + apply_stencil = ApplyStencilFunction(typegen=typegen, - stencil=self.build_stencil(order), + stencil=stencil, + symbol2var=symbol2var, ftype=ftype, itype=itype, data_storage=storage, vectorize=vectorize_u, @@ -169,7 +175,6 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): vector_suffixes = xyz, op=op, custom_id=custom_id, - multipliers={'inv_dx':'ftype'}, known_args=known_args) reqs['apply_stencil'] = apply_stencil @@ -230,7 +235,6 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): dw_dt = CodegenVectorClBuiltin('dW_dt', ftype, dim, tg) ghosts = CodegenVariable('ghosts', itype, tg, const=True, value=order/2, symbolic_mode=True) - with s._function_(): s.jumpline() diff --git a/hysop/backend/codegen/kernels/bandwidth.py b/hysop/backend/codegen/kernels/bandwidth.py index 768be3aee..94ac571b7 100644 --- a/hysop/backend/codegen/kernels/bandwidth.py +++ b/hysop/backend/codegen/kernels/bandwidth.py @@ -12,12 +12,12 @@ from hysop.constants import np from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.kernel_codegen import KernelCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.opencl import cl, clCharacterize -from hysop.backend.opencl.tools import OpenCLEnvironment -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_env import OpenClEnvironment +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.backend.opencl.kernel_autotuner import KernelAutotuner, AutotunerConfig class BandwidthKernel(KernelCodeGenerator): diff --git a/hysop/backend/codegen/kernels/copy_kernel.py b/hysop/backend/codegen/kernels/copy_kernel.py index 9299316e4..2274e881b 100644 --- a/hysop/backend/codegen/kernels/copy_kernel.py +++ b/hysop/backend/codegen/kernels/copy_kernel.py @@ -12,12 +12,12 @@ from hysop.constants import np from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.kernel_codegen import KernelCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.opencl import cl, clCharacterize -from hysop.backend.opencl.tools import OpenCLEnvironment -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_env import OpenClEnvironment +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.backend.opencl.kernel_autotuner import KernelAutotuner, AutotunerConfig class CopyKernel(KernelCodeGenerator): @@ -115,8 +115,8 @@ class CopyKernel(KernelCodeGenerator): def autotune(cl_env, src, dst, vtype, count, restrict, build_opts, autotuner_config): - if not isinstance(cl_env,OpenCLEnvironment): - raise ValueError('cl_env is not an OpenCLEnvironment.') + if not isinstance(cl_env,OpenClEnvironment): + raise ValueError('cl_env is not an OpenClEnvironment.') if not isinstance(cl_env.typegen,OpenClTypeGen): raise ValueError('typegen is not an OpenClTypeGen.') @@ -211,7 +211,7 @@ class CopyKernel(KernelCodeGenerator): kernel_args=kernel_args, vectorized=vectorized, force_verbose=None,force_debug=None) - kernel_launcher = KernelLauncher(kernel, queue, list(gwi), list(lwi)) + kernel_launcher = OpenClKernelLauncher(kernel, queue, list(gwi), list(lwi)) return (stats, kernel_launcher, kernel_args, kernel_args_mapping, cached_bytes) candidates = [i for i in typegen.vsizes if work_size%i==0] diff --git a/hysop/backend/codegen/kernels/directional_advection.py b/hysop/backend/codegen/kernels/directional_advection.py index ec0741484..269e6b1a1 100644 --- a/hysop/backend/codegen/kernels/directional_advection.py +++ b/hysop/backend/codegen/kernels/directional_advection.py @@ -8,15 +8,16 @@ from hysop import __VERBOSE__, __KERNEL_DEBUG__ from hysop.backend.opencl import cl from hysop.tools.misc import Utils +from hysop.tools.types import check_instance from hysop.constants import np -from hysop.constants import DirectionLabels, MeshDirection, BoundaryCondition +from hysop.constants import DirectionLabels, BoundaryCondition from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.kernel_codegen import KernelCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin, \ CodegenArray -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.codegen.base.statistics import WorkStatistics @@ -31,10 +32,10 @@ from hysop.backend.codegen.functions.advection_rhs import DirectionalAdvectionR from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta from hysop.backend.opencl import cl, clCharacterize -from hysop.backend.opencl.tools import OpenCLEnvironment -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_env import OpenClEnvironment +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.backend.opencl.kernel_autotuner import KernelAutotuner, AutotunerFlags, KernelGenerationError -from hysop.backend.opencl.gpu_discrete import GPUDiscreteField +from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField class DirectionalAdvectionKernel(KernelCodeGenerator): @@ -54,10 +55,9 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): assert work_dim>0 and work_dim<=3 assert nparticles in [1,2,4,8,16] - assert boundary in BoundaryCondition.entries() - assert isinstance(rk_scheme, ExplicitRungeKutta) + check_instance(boundary,BoundaryCondition) + check_instance(rk_scheme, ExplicitRungeKutta) - boundary = BoundaryCondition.value(boundary) is_periodic = (boundary==BoundaryCondition.PERIODIC) assert boundary in [BoundaryCondition.PERIODIC, BoundaryCondition.NONE] assert (is_periodic and not is_cached) or min_ghosts>0 @@ -484,8 +484,8 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): build_opts, autotuner_config): - if not isinstance(cl_env,OpenCLEnvironment): - raise ValueError('cl_env is not an OpenCLEnvironment.') + if not isinstance(cl_env,OpenClEnvironment): + raise ValueError('cl_env is not an OpenClEnvironment.') if not isinstance(cl_env.typegen,OpenClTypeGen): raise ValueError('typegen is not an OpenClTypeGen.') @@ -517,8 +517,8 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): .format(rk_scheme.__class__.__name__) raise ValueError(msg) - if not isinstance(velocity,GPUDiscreteField): - raise ValueError('velocity is not a GPUDiscreteField') + if not isinstance(velocity,OpenClDiscreteField): + raise ValueError('velocity is not a OpenClDiscreteField') if (velocity.nb_components != dim): raise ValueError('Velocity vector components mismatch with dim {}.'.format(dim)) if not isinstance(position,cl.Buffer): @@ -531,13 +531,10 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): raise ValueError('compute_resolution mismatch in mesh_info.local_mesh, got {} and {}.'\ .format(compute_resolution,cr)) - if boundary not in BoundaryCondition.entries(): + if not isinstance(boundaryn, BoundaryCondition): msg = 'Unknown boundary condition \'{}\', valid ones are {}.'\ - .format(boundary,BoundaryCondition.fields().keys()) + .format(boundary,BoundaryCondition.svalues()) raise ValueError(msg) - else: - boundary = BoundaryCondition.value(boundary) - sboundary = BoundaryCondition.svalue(boundary) ghosts = mesh_info['ghosts'].value[:dim] cache_ghosts = DirectionalAdvectionKernel.cache_ghosts(max_dt, velocity_dx, max_velocity) @@ -661,7 +658,7 @@ class DirectionalAdvectionKernel(KernelCodeGenerator): force_verbose=None,force_debug=None, return_codegen=True) - kernel_launcher = KernelLauncher(kernel, queue, list(gwi), list(lwi)) + kernel_launcher = OpenClKernelLauncher(kernel, queue, list(gwi), list(lwi)) total_work = work_size[0]*work_size[1]*work_size[2] # per_work_statistics = codegen.per_work_statistics() diff --git a/hysop/backend/codegen/kernels/directional_stretching.py b/hysop/backend/codegen/kernels/directional_stretching.py index 72c2e83a6..b4b2196f2 100644 --- a/hysop/backend/codegen/kernels/directional_stretching.py +++ b/hysop/backend/codegen/kernels/directional_stretching.py @@ -4,6 +4,7 @@ from hysop.deps import np, operator, hashlib, contextlib from hysop import __VERBOSE__, __KERNEL_DEBUG__ from hysop.tools.misc import Utils +from hysop.tools.types import check_instance from hysop.constants import DirectionLabels, BoundaryCondition @@ -13,7 +14,7 @@ from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.kernel_codegen import KernelCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin, \ CodegenArray -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.codegen.base.statistics import WorkStatistics @@ -28,7 +29,7 @@ from hysop.backend.codegen.functions.stretching_rhs import DirectionalStretching from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta from hysop.backend.opencl import cl, clCharacterize -from hysop.backend.opencl.clenv import OpenCLEnvironment +from hysop.backend.opencl.opencl_env import OpenClEnvironment from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.backend.opencl.kernel_autotuner import KernelAutotuner, AutotunerFlags, KernelGenerationError from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField @@ -36,9 +37,9 @@ from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField class DirectionalStretchingKernel(KernelCodeGenerator): @staticmethod - def codegen_name(ftype,is_cached,direction,sformulation): + def codegen_name(ftype,is_cached,direction,formulation): cache = 'cached_' if is_cached else '' - sformulation = sformulation.lower() + sformulation = str(formulation).lower() return 'directional_{}stretching_{}_{}{}'.format(cache,sformulation, ftype[0],DirectionLabels[direction]) @@ -52,19 +53,15 @@ class DirectionalStretchingKernel(KernelCodeGenerator): assert dim==3 assert direction<dim assert order>1 and order%2==0 - assert formulation in StretchingFormulation.entries() - assert boundary in BoundaryCondition.entries() - assert isinstance(rk_scheme, ExplicitRungeKutta) + check_instance(formulation, StretchingFormulation) + check_instance(boundary, BoundaryCondition) + check_instance(rk_scheme,ExplicitRungeKutta) if known_vars is None: known_vars = {} - boundary = BoundaryCondition.value(boundary) assert boundary in [BoundaryCondition.NONE,BoundaryCondition.PERIODIC] - formulation = StretchingFormulation.value(formulation) - sformulation = StretchingFormulation.svalue(formulation) - local_size_known = ('local_size' in known_vars) is_conservative = (formulation==StretchingFormulation.CONSERVATIVE) is_periodic = (boundary==BoundaryCondition.PERIODIC) @@ -77,7 +74,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): if is_conservative and not is_cached: raise ValueError('Conservetive stretching requires caching.') - name = DirectionalStretchingKernel.codegen_name(ftype,is_cached,direction,sformulation) + name = DirectionalStretchingKernel.codegen_name(ftype,is_cached,direction,formulation) kernel_reqs = self.build_requirements(typegen, dim, ftype, order, is_cached, rk_scheme, direction, boundary, symbolic_mode, formulation, storage, @@ -104,7 +101,6 @@ class DirectionalStretchingKernel(KernelCodeGenerator): self.boundary = boundary self.rk_scheme = rk_scheme self.formulation = formulation - self.sformulation = sformulation self.storage = storage self.local_size_known = local_size_known self.is_conservative = is_conservative @@ -623,8 +619,8 @@ class DirectionalStretchingKernel(KernelCodeGenerator): mesh_info, dt, build_opts, autotuner_config): - if not isinstance(cl_env,OpenCLEnvironment): - raise ValueError('cl_env is not an OpenCLEnvironment.') + if not isinstance(cl_env,OpenClEnvironment): + raise ValueError('cl_env is not an OpenClEnvironment.') if not isinstance(cl_env.typegen,OpenClTypeGen): raise ValueError('typegen is not an OpenClTypeGen.') @@ -657,13 +653,10 @@ class DirectionalStretchingKernel(KernelCodeGenerator): msg = 'Given Runge-Kutta scheme is not an instance of ExplicitRungeKutta, got a \'{}\'.'\ .format(rk_scheme.__class__.__name__) raise TypeError(msg) - if formulation not in StretchingFormulation.entries(): + if not isinstance(formulation, StretchingFormulation): msg = 'unknown stretching formulation \'{}\', valid ones are {}.'\ - .format(formulation,StretchingFormulation.svalues()) + .format(formulation.svalues()) raise TypeError(msg) - else: - formulation = StretchingFormulation.value(formulation) - sformulation = StretchingFormulation.svalue(formulation) if not isinstance(velocity,OpenClDiscreteField): raise ValueError('velocity is not a OpenClDiscreteField') @@ -678,13 +671,10 @@ class DirectionalStretchingKernel(KernelCodeGenerator): raise ValueError('compute_resolution mismatch in mesh_info.local_mesh, got {} and {}.'\ .format(compute_resolution,cr)) - if boundary not in BoundaryCondition.entries(): + if not isinstance(boundary, BoundaryCondition): msg = 'Unknown boundary condition \'{}\', valid ones are {}.'\ - .format(boundary,BoundaryCondition.fields().keys()) + .format(boundary,BoundaryCondition.svalues()) raise TypeError(msg) - else: - boundary = BoundaryCondition.value(boundary) - sboundary = BoundaryCondition.svalue(boundary) ghosts = mesh_info['ghosts'].value[:dim] min_ghosts = DirectionalStretchingKernel.min_ghosts(boundary, formulation, @@ -742,7 +732,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ## CodeGenerator cached=True - name = DirectionalStretchingKernel.codegen_name(ftype, cached, direction, sformulation) + name = DirectionalStretchingKernel.codegen_name(ftype, cached, direction, formulation) codegen = DirectionalStretchingKernel(typegen=typegen, order=order, dim=dim, direction=direction, boundary=boundary, formulation=formulation, rk_scheme=rk_scheme, @@ -771,7 +761,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): return (kernel, kernel_args, src_hash, global_size) ## Kernel Autotuner - name = DirectionalStretchingKernel.codegen_name(ftype, False, direction, sformulation) + name = DirectionalStretchingKernel.codegen_name(ftype, False, direction, formulation) autotuner = KernelAutotuner(name=name, work_dim=dim, local_work_dim=1, build_opts=build_opts, autotuner_config=autotuner_config) diff --git a/hysop/backend/codegen/kernels/empty.py b/hysop/backend/codegen/kernels/empty.py index 0825ec983..c9a95056b 100644 --- a/hysop/backend/codegen/kernels/empty.py +++ b/hysop/backend/codegen/kernels/empty.py @@ -1,7 +1,7 @@ from hysop.backend.codegen.base.variables import CodegenVectorClBuiltin from hysop.backend.codegen.base.kernel_codegen import KernelCodeGenerator -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import ArgDict class EmptyKernel(KernelCodeGenerator): diff --git a/hysop/backend/codegen/kernels/stretching.py b/hysop/backend/codegen/kernels/stretching.py index f54ff811b..f370e5281 100644 --- a/hysop/backend/codegen/kernels/stretching.py +++ b/hysop/backend/codegen/kernels/stretching.py @@ -5,7 +5,7 @@ import numpy as np from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.kernel_codegen import KernelCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct diff --git a/hysop/backend/codegen/kernels/tests/test_directional_advection.py b/hysop/backend/codegen/kernels/tests/test_directional_advection.py index 62929b0cd..9e75de89f 100644 --- a/hysop/backend/codegen/kernels/tests/test_directional_advection.py +++ b/hysop/backend/codegen/kernels/tests/test_directional_advection.py @@ -3,9 +3,10 @@ import copy, math from hysop import __ENABLE_LONG_TESTS__ from hysop.backend.opencl import cl +from hysop.tools.types import check_instance from hysop.constants import np, BoundaryCondition from hysop.backend.codegen.base.test import _test_mesh_info , _test_typegen -from hysop.methods import ExplicitRungeKutta +from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta from hysop.backend.codegen.kernels.directional_advection import DirectionalAdvectionKernel class TestDirectionalAdvection(object): @@ -220,7 +221,7 @@ class TestDirectionalAdvection(object): .format('cached ' if cached else '', work_dim, rk_scheme.name(), - BoundaryCondition.svalue(boundary).lower(), + str(boundary).lower(), nparticles) print msg @@ -393,7 +394,7 @@ class TestDirectionalAdvection(object): def _check_kernels(self, rk_scheme): - assert isinstance(rk_scheme, ExplicitRungeKutta) + check_instance(rk_scheme,ExplicitRungeKutta) cached=[False,True] boundaries=[BoundaryCondition.PERIODIC] diff --git a/hysop/backend/codegen/kernels/tests/test_directional_stretching.py b/hysop/backend/codegen/kernels/tests/test_directional_stretching.py index 15fde42b8..235e864cc 100644 --- a/hysop/backend/codegen/kernels/tests/test_directional_stretching.py +++ b/hysop/backend/codegen/kernels/tests/test_directional_stretching.py @@ -3,10 +3,12 @@ import copy from hysop import __ENABLE_LONG_TESTS__ from hysop.backend.opencl import cl +from hysop.tools.types import check_instance from hysop.constants import np, BoundaryCondition from hysop.backend.codegen.base.test import _test_mesh_info , _test_typegen from hysop.backend.codegen.kernels.directional_stretching import DirectionalStretchingKernel -from hysop.methods import StretchingFormulation, ExplicitRungeKutta +from hysop.methods import StretchingFormulation +from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta class TestDirectionalStretching(object): @@ -201,8 +203,7 @@ class TestDirectionalStretching(object): W1 = [W0[i] + dt*K0[i] for i in xrange(3)] W = W1 else: - msg = 'Unknown stretching formulation scheme {}.'.format( - StretchingFormulation.svalue(formulation)) + msg = 'Unknown stretching formulation scheme {}.'.format(formulation) raise ValueError(msg) elif rk_scheme.name() == 'RK2': Wc = [ w.copy() for w in W ] @@ -238,8 +239,7 @@ class TestDirectionalStretching(object): W2 = [W0[i] + 1.0*dt*K1[i] for i in xrange(3)] W = W2 else: - msg = 'Unknown stretching formulation scheme {}.'.format( - StretchingFormulation.svalue(formulation)) + msg = 'Unknown stretching formulation scheme {}.'.format(formulation) raise ValueError(msg) elif rk_scheme.name() == 'RK4': Wc = [ w.copy() for w in W ] @@ -297,8 +297,7 @@ class TestDirectionalStretching(object): W4 = [W0[i] + dt*K[i] for i in xrange(3)] W = W4 else: - msg = 'Unknown stretching formulation scheme {}.'.format( - StretchingFormulation.svalue(formulation)) + msg = 'Unknown stretching formulation scheme {}.'.format(formulation) raise ValueError(msg) else: msg = 'Unknown Runge-Kutta scheme {}.'.format(rk_scheme) @@ -312,11 +311,11 @@ class TestDirectionalStretching(object): msg = '\nTesting {}{} with order {} and scheme {} in direction {} with {} boundaries.'\ .format('cached ' if cached else '', - StretchingFormulation.svalue(formulation).lower(), + str(formulation).lower(), order, rk_scheme.name(), direction, - BoundaryCondition.svalue(boundary).lower()) + str(boundary).lower()) print msg dt = self.dt @@ -463,8 +462,8 @@ class TestDirectionalStretching(object): def _check_kernels(self, formulation, rk_scheme): - assert formulation in StretchingFormulation.entries() - assert isinstance(rk_scheme, ExplicitRungeKutta) + check_instance(formulation,StretchingFormulation) + check_instance(rk_scheme,ExplicitRungeKutta) cached=[False,True] boundaries=[BoundaryCondition.NONE, BoundaryCondition.PERIODIC] @@ -558,7 +557,7 @@ class TestDirectionalStretching(object): if __name__ == '__main__': - TestDirectionalStretching.setup_class(do_extra_tests=False, enable_error_plots=True) + TestDirectionalStretching.setup_class(do_extra_tests=True, enable_error_plots=False) test = TestDirectionalStretching() test.test_stretching_gradUW_Euler() diff --git a/hysop/backend/codegen/kernels/transpose.py b/hysop/backend/codegen/kernels/transpose.py index d3723de2f..43d94839d 100644 --- a/hysop/backend/codegen/kernels/transpose.py +++ b/hysop/backend/codegen/kernels/transpose.py @@ -4,7 +4,7 @@ import numpy as np from hysop.backend.codegen.base.opencl_codegen import OpenClCodeGenerator from hysop.backend.codegen.base.kernel_codegen import KernelCodeGenerator from hysop.backend.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin, CodegenArray -from hysop.backend.codegen.base.types import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen from hysop.backend.codegen.base.utils import WriteOnceDict, ArgDict from hysop.backend.codegen.functions.compute_index import ComputeIndexFunction diff --git a/hysop/backend/codegen/structs/mesh_info.py b/hysop/backend/codegen/structs/mesh_info.py index f73a063ef..3512c5307 100644 --- a/hysop/backend/codegen/structs/mesh_info.py +++ b/hysop/backend/codegen/structs/mesh_info.py @@ -1,16 +1,9 @@ -from hysop.constants import np +from hysop.deps import np +from hysop.tools.types import check_instance from hysop.backend.codegen.base.enum_codegen import EnumCodeGenerator from hysop.backend.codegen.base.struct_codegen import StructCodeGenerator -from hysop.backend.codegen.base.types import OpenClTypeGen - -# class MeshDirectionEnum(EnumCodeGenerator): - # def __init__(self): - # super(MeshDirectionEnum,self).__init__(MeshDirection) - -# class TranspositionStateEnum(EnumCodeGenerator): - # def __init__(self): - # super(TranspositionStateEnum,self).__init__(TranspositionState) +from hysop.backend.opencl.opencl_types import OpenClTypeGen class MeshBaseStruct(StructCodeGenerator): def __init__(self, typegen, typedef=None): @@ -119,21 +112,6 @@ class MeshInfoStruct(StructCodeGenerator): _append( ('global_mesh', mesh_base.dtype) ) i+=2 - # mesh_dir = MeshDirectionEnum() - # mesh_dir_field = i - # _append( ('direction', mesh_dir.dtype) ) - # i+=1 - - # mesh_state = TranspositionStateEnum() - # mesh_state_field = i - # _append( ('state', mesh_state.dtype) ) - # i+=1 - - # ctype_overrides = { - # mesh_dir_field:mesh_dir.ctype, - # mesh_state_field:mesh_state.ctype - # } - comments = [ "Dimension of the mesh", "Index of the first local compute point in the global grid", @@ -143,11 +121,9 @@ class MeshInfoStruct(StructCodeGenerator): "1/dx", "Local mesh", "Global mesh", - "Current memory aligned component", - "Current components reordering state" ] - return dtypes, comments, ctype_overrides, [mesh_base, mesh_dir, mesh_state] + return dtypes, comments, None, [mesh_base] def create(self, name, @@ -155,7 +131,6 @@ class MeshInfoStruct(StructCodeGenerator): start, stop, ghosts, dx, local_mesh, global_mesh, - mesh_dir, mesh_state, **kargs): if dim>4: @@ -166,9 +141,6 @@ class MeshInfoStruct(StructCodeGenerator): dtype,_,_,_ = MeshInfoStruct.build_dtype(tg) - # direction = MeshDirection.value(mesh_dir) - # state = TranspositionState.value(mesh_state) - dx = np.asarray(dx) mesh_info_vals = { 'dim' : tg.make_intn(dim,1), @@ -178,9 +150,7 @@ class MeshInfoStruct(StructCodeGenerator): 'dx' : tg.make_floatn(dx,4), 'inv_dx' : tg.make_floatn(1.0/dx,4), 'local_mesh' : local_mesh[0], - 'global_mesh': global_mesh[0], - # 'direction' : direction, - # 'state' : state + 'global_mesh': global_mesh[0] } def extend(var,d=0): @@ -204,8 +174,6 @@ class MeshInfoStruct(StructCodeGenerator): var_overrides = dict( local_mesh=local_mesh[1], global_mesh=global_mesh[1], - # direction = MeshDirection.variable('direction',tg,mesh_dir), - # state = TranspositionState.variable('state',tg,mesh_state) ) cg_var = self.build_codegen_variable(name=name, value=value,var_overrides=var_overrides, @@ -218,12 +186,9 @@ class MeshInfoStruct(StructCodeGenerator): @staticmethod def create_from_mesh(name,cl_env, mesh, - # mesh_dir=MeshDirection.X, mesh_state=TranspositionState.XYZ, **kargs): from hysop.domain.mesh import Mesh - assert isinstance(mesh,Mesh) - # assert mesh_dir in MeshDirection.entries() - # assert mesh_state in TranspositionState.entries() + check_instance(mesh,Mesh) tg = cl_env.typegen @@ -273,8 +238,6 @@ class MeshInfoStruct(StructCodeGenerator): var_overrides = dict( local_mesh = self.mesh_base.build_codegen_variable('local_mesh',const=const), global_mesh = self.mesh_base.build_codegen_variable('global_mesh',const=const), - # direction = MeshDirection.variable('direction',tg), - # state = TranspositionState.variable('state',tg) ) return super(MeshInfoStruct,self).build_codegen_variable(name=name,var_overrides=var_overrides,**kargs) diff --git a/hysop/backend/opencl/cltypes.py b/hysop/backend/opencl/cltypes.py deleted file mode 100644 index cbb8640a2..000000000 --- a/hysop/backend/opencl/cltypes.py +++ /dev/null @@ -1,333 +0,0 @@ - -import string - -from hysop import __KERNEL_DEBUG__ -from hysop.constants import np, it -from hysop.backend.opencl import cl, clArray -from hysop.tools.numerics import MPZ, MPZ, MPFR, F2Q - -vsizes = [1,2,3,4,8,16] -base_types = ['float','signed','unsigned'] -float_base_types = ['half','float','double'] -signed_base_types = ['char','short','int','long'] -unsigned_base_types = ['uchar','ushort','uint','ulong'] - -float_types = [] -signed_types = [] -unsigned_types = [] -for b in base_types: - b_base_types = eval(b+'_base_types') - b_types = eval(b+'_types') - for f,c in it.product(b_base_types,vsizes): - if c==1: - if f=='half': continue - else: ftype = f - else: - ftype = f+str(c) - b_types.append(ftype) -integer_types = signed_types + unsigned_types -builtin_types = integer_types + float_types - - -float_base_type_require = { - 'half' : 'cl_khr_fp16', - 'float' : None, - 'double': 'cl_khr_fp64' -} - -FLT_DIG = { - 'half' : 3, # = HALF_DIG - 'float' : 6, # = FLT_DIG - 'double': 15 # = DBL_DIG -} -FLT_MANT_DIG = { - 'half' : 11, # = HALF_MANT_DIG - 'float' : 24, # = FLT_MANT_DIG - 'double': 53 # = DBL_MANT_DIG -} -FLT_LITERAL = { - 'half' : 'h', - 'float' : 'f', - 'double': '' -} -FLT_BYTES = { - 'half' : 2, - 'float' : 4, - 'double': 8 -} - -def basetype(fulltype): - return fulltype.translate(None,string.digits) -def components(fulltype): - comp = fulltype.translate(None,string.ascii_letters) - return 1 if comp == '' else int(comp) -def mangle_vtype(fulltype): - return basetype(fulltype)[0]+str(components(fulltype)) - -def vtype(basetype,N): - return basetype + ('' if N==1 else str(N)) -def itype(fulltype): - N = components(fulltype) - return 'int' + ('' if N==1 else str(N)) -def uitype(fulltype): - N = components(fulltype) - return 'uint' + ('' if N==1 else str(N)) -def np_dtype(fulltype): - return cl.tools.get_or_register_dtype(fulltype) - -def vtype_component_adressing(i,mode='hex'): - if mode=='hex': return '0123456789abcdef'[i] - elif mode=='HEX': return '0123456789ABCDEF'[i] - elif mode=='pos': return 'xyzw'[i] - else: raise ValueError('Bad vtype component adressing mode!') - -def vtype_access(i,N,mode='hex'): - assert i<N - if N==1: return '' - else: return ('s' if mode.lower()=='hex' else '') + vtype_component_adressing(i,mode) - -def float_to_hex_str(f,fbtype): - sf = float(f).hex().split('0x') + [''] - buf = sf[1].split('p') - - mantissa = buf[0] - exponent = buf[1] - - mant_dig = FLT_MANT_DIG[fbtype] - literal = FLT_LITERAL [fbtype] - - nhex = (mant_dig-1+3)//4 + 2 - # +2= leading one or zero and decimal point characters (1.abde... or 0.abcde...) - - sf[0] = ('+' if sf[0] == '' else sf[0])+'0x' - sf[1] = mantissa[:nhex] - sf[2] = 'p'+exponent+literal - return ''.join(sf) - -def float_to_dec_str(f,fbtype): - sf = float(f).__repr__().split('.') - offset = (1 if sf[0][0] in ['-','+'] else 0) - sf[1] = sf[1][:FLT_DIG[fbtype]-len(sf[0])+offset+1] - return ('+' if f>0 else '') + '.'.join(sf)+FLT_LITERAL[fbtype] - - - -#pyopencl specific -vec = clArray.vec - -def npmake(dtype): - return lambda scalar: np.array([scalar], dtype=dtype) - -vtype_int = [np.int32, vec.int2, vec.int3, vec.int4, vec.int8, vec.int16 ] -vtype_uint = [np.uint32, vec.uint2, vec.uint3, vec.uint4, vec.uint8, vec.uint16 ] -vtype_simple = [np.float32, vec.float2, vec.float3, vec.float4, vec.float8, vec.float16 ] -vtype_double = [np.float64, vec.double2, vec.double3, vec.double4, vec.double8, vec.double16 ] - -make_int = [npmake(np.int32), vec.make_int2, vec.make_int3, - vec.make_int4, vec.make_int8, - vec.make_int16 ] -make_uint = [npmake(np.uint32), vec.make_uint2, vec.make_uint3, - vec.make_uint4, vec.make_uint8, - vec.make_uint16 ] -make_simple = [npmake(np.float32), vec.make_float2, vec.make_float3, - vec.make_float4, vec.make_float8, - vec.make_float16 ] -make_double = [npmake(np.float64), vec.make_double2, vec.make_double3, - vec.make_double4, vec.make_double8, - vec.make_double16 ] - -def simplen(n): - if n==1: return np.float32 - i = vsizes.index(n) - return vtype_simple[i] -def doublen(n): - if n==1: return np.float64 - i = vsizes.index(n) - return vtype_double[i] -def intn(n): - if n==1: return np.int32 - i = vsizes.index(n) - return vtype_int[i] -def uintn(n): - if n==1: return np.uint32 - i = vsizes.index(n) - return vtype_uint[i] - -_typen = { - 'float' : simplen, - 'simple': simplen, - 'double': doublen, - 'int' : intn, - 'uint' : uintn -} - - -def typen(btype,n): - return _typen[btype](n) - -def cl_type_to_dtype(cl_type): - btype = basetype(cl_type) - N = components(cl_type) - return typen(btype,N) - -def make_simplen(vals,n,dval=0): - vals = (vals,) if np.isscalar(vals) else tuple(vals) - vals += (dval,)*(n-len(vals)) - i = vsizes.index(n) - return make_simple[i](*vals) -def make_doublen(vals,n,dval=0): - vals = (vals,) if np.isscalar(vals) else tuple(vals) - vals += (dval,)*(n-len(vals)) - i = vsizes.index(n) - return make_double[i](*vals) -def make_intn(vals,n,dval=0): - vals = (vals,) if np.isscalar(vals) else tuple(vals) - vals += (dval,)*(n-len(vals)) - i = vsizes.index(n) - return make_int[i](*vals) -def make_uintn(vals,n,dval=0): - vals = (vals,) if np.isscalar(vals) else tuple(vals) - vals += (dval,)*(n-len(vals)) - i = vsizes.index(n) - return make_uint[i](*vals) - - -class TypeGen(object): - def __init__(self, fbtype='float', float_dump_mode='dec'): - - self.float_base_types = float_base_types - self.FLT_BYTES = FLT_BYTES - self.FLT_DIG = FLT_DIG - self.FLT_MANT_DIG = FLT_MANT_DIG - self.FLT_LITERAL = FLT_LITERAL - - self.np_dtype = np_dtype - - self.float_to_dec_str = float_to_dec_str - self.float_to_hex_str = float_to_hex_str - - self.fbtype = fbtype - - self.float_dump_mode = float_dump_mode - if float_dump_mode in ['hex', 'hexadecimal']: - self.float_to_str = float_to_hex_str - elif float_dump_mode in ['dec','decimal']: - self.float_to_str = float_to_dec_str - else: - raise ValueError('Unknown float_dump_mode \'{}\''.format(float_dump_mode)) - - def dump(self, val): - if val.__class__ in [list,tuple,dict,np.ndarray]: - raise ValueError('Value is not a scalar, got {}.'.format(val)) - if val.__class__ in [float,np.float16,np.float32,np.float64]: - sval = self.float_to_str(val, self.fbtype) - return '({})'.format(sval) - elif val.__class__ in [int,np.int8,np.int16,np.int32,np.int64,MPZ]: - sign = ('' if val==0 else ('+' if val>0 else '-')) - sval = str(val) - if val<0: - sval=sval[1:] - if val!=0: - sval = '({}{})'.format(sign,sval) - else: - sval = '0' - return sval - elif val.__class__ in [bool,np.bool_]: - return 'true' if val else 'false' - elif isinstance(val, MPQ): - if __KERNEL_DEBUG__: - return '({}.0{f}/{}.0{f})'.format(val.numerator,val.denominator, - f=FLT_LITERAL[self.fbtype]) - else: - return self.dump(float(val)) - else: - return val.__str__() - - -# struct type generation (type size and struct field offsets) is different for each device -# depending on architecture and compiler implementation and features. -# /!\ do not use the same opencl typegen instance for two different devices that are -# not equivalent. -class OpenClTypeGen(TypeGen): - @staticmethod - def devicelessTypegen(): - """ - Sometimes we do not need structs and code generation is device independent. - """ - return OpenClTypeGen(device=None,context=None,platform=None); - - def __init__(self, device, context, platform, - fbtype='float', float_dump_mode='dec'): - super(OpenClTypeGen,self).__init__(fbtype,float_dump_mode) - - self.device = device - self.context = context - self.platform = platform - - self.vsizes = vsizes - self.signed_base_types = signed_base_types - self.unsigned_base_types = unsigned_base_types - self.integer_base_types = signed_base_types + unsigned_base_types - - self.float_types = float_types - self.signed_types = signed_types - self.unsigned_types = unsigned_types - self.integer_types = integer_types - self.builtin_types = builtin_types - - self.float_base_type_require = float_base_type_require - - self.basetype = basetype - self.components = components - self.vtype = vtype - self.itype = itype - self.uitype = uitype - self.np_dtype = np_dtype - - self.vtype_component_adressing = vtype_component_adressing - self.vtype_access = vtype_access - self.mangle_vtype = mangle_vtype - self.float_to_dec_str = float_to_dec_str - self.float_to_hex_str = float_to_hex_str - - #pyopencl specifics - self.intn = intn - self.uintn = uintn - self.simplen = simplen - self.doublen = doublen - self.typen = typen - - self.make_intn = make_intn - self.make_uintn = make_uintn - self.make_simplen = make_simplen - self.make_doublen = make_doublen - - if fbtype == 'float': - self.floatn = simplen - self.make_floatn = make_simplen - elif fbtype == 'double': - self.floatn = doublen - self.make_floatn = make_doublen - # elif fbtype == 'half': - # self.floatn = halfn - # self.make_floatn = make_halfn - else: - raise ValueError('Unknown fbtype \'{}\''.format(fbtype)) - - def device_has_ftype(self,device): - dev_exts = device.extensions.split(' ') - req = self.float_base_type_require[self.fbtype] - return (req is None) or (req[0] in dev_exts) - def cl_requirements(self): - return [self.float_base_type_require[self.fbtype]]; - - def dtype_from_str(self,stype): - stype = stype.replace('ftype', self.fbtype).replace('fbtype',self.fbtype) - btype = basetype(stype) - N = components(stype) - return typen(btype,N) - - def __repr__(self): - return '{}_{}_{}_{}'.format(self.platform.name,self.device.name, - self.fbtype,self.float_dump_mode) - diff --git a/hysop/backend/opencl/discrete.py b/hysop/backend/opencl/discrete.py index b58f92b09..a60a54135 100644 --- a/hysop/backend/opencl/discrete.py +++ b/hysop/backend/opencl/discrete.py @@ -4,7 +4,7 @@ from hysop import __VERBOSE__ from hysop.constants import HYSOP_ORDER, HYSOP_REAL, DirectionLabels from hysop.fields.discrete import DiscreteField from hysop.backend.opencl import cl -from hysop.backend.opencl.gpu_kernel import KernelLauncher, KernelListLauncher +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher, KernelListLauncher from hysop.backend.arrays.numpy_backend import NumpyBackend as npw from hysop.backend.arrays.numpy_backend import OpenClBackend as npcl @@ -30,7 +30,7 @@ class OpenClDiscreteField(DiscreteField): Field name """ # init base class - super(GPUDiscreteField, self).__init__(topology, is_vector, name) + super(OpenClDiscreteField, self).__init__(topology, is_vector, name) # OpenCL environment self.cl_env = cl_env @@ -54,9 +54,9 @@ class OpenClDiscreteField(DiscreteField): @classmethod def from_field(cls, cl_env, vfield): - if not isinstance(vfield, GPUDiscreteField): + if not isinstance(vfield, OpenClDiscreteField): vfield.__class__ = cls - GPUDiscreteField.__init__( + OpenClDiscreteField.__init__( vfield, cl_env, vfield.topology, vfield.nb_components > 1, vfield.name) diff --git a/hysop/backend/opencl/opencl_discrete.py b/hysop/backend/opencl/opencl_discrete.py index 9f0b63df2..639366bb4 100644 --- a/hysop/backend/opencl/opencl_discrete.py +++ b/hysop/backend/opencl/opencl_discrete.py @@ -48,7 +48,7 @@ class OpenClDiscreteField(DiscreteField): self.precision = precision # Memory used self.mem_size = 0 - ## Initialization OpenCL kernel as KernelLauncher + ## Initialization OpenCL kernel as OpenClKernelLauncher self.init_kernel = None self._isReleased = False ## OpenCL Buffer pointer @@ -157,7 +157,7 @@ class OpenClDiscreteField(DiscreteField): def setInitializationKernel(self, kernel): """ Set the initialization kernel - @param kernel : KernelLauncher to use for initialize field. + @param kernel : OpenClKernelLauncher to use for initialize field. """ self.init_kernel = kernel @@ -194,7 +194,7 @@ class OpenClDiscreteField(DiscreteField): t = self.precision(time) if __VERBOSE__: print "{" + str(self._rank) + "}", "Initialize", self.name - isGPUKernel = isinstance(formula, KernelLauncher) \ + isGPUKernel = isinstance(formula, OpenClKernelLauncher) \ or isinstance(formula, KernelListLauncher) if not isGPUKernel and self.init_kernel is None: DiscreteField.initialize(self, formula, False, time, *args) diff --git a/hysop/backend/opencl/clenv.py b/hysop/backend/opencl/opencl_env.py similarity index 99% rename from hysop/backend/opencl/clenv.py rename to hysop/backend/opencl/opencl_env.py index ce6f84f3e..cc6510085 100644 --- a/hysop/backend/opencl/clenv.py +++ b/hysop/backend/opencl/opencl_env.py @@ -9,10 +9,10 @@ from hysop.backend.opencl import cl, clTools, __OPENCL_PROFILE__ from hysop.backend.opencl.tools import convert_device_type, convert_precision from hysop.backend.opencl.tools import get_platform, get_context, get_device, \ create_queue, parse_opencl_file -from hysop.backend.opencl.cltypes import OpenClTypeGen +from hysop.backend.opencl.opencl_types import OpenClTypeGen -class OpenCLEnvironment(object): +class OpenClEnvironment(object): """OpenCL environment informations and useful functions. """ def __init__(self, platform_id = __DEFAULT_PLATFORM_ID__, diff --git a/hysop/backend/opencl/tools.py b/hysop/backend/opencl/tools.py index 6f0161795..470ea2621 100644 --- a/hysop/backend/opencl/tools.py +++ b/hysop/backend/opencl/tools.py @@ -1,6 +1,6 @@ """Classes and tools used to handle the OpenCL backend. -* :class:`~hysop.gpu.tools.OpenCLEnvironment`: +* :class:`~hysop.gpu.tools.OpenClEnvironment`: object handling opencl platform, device ... info. * :func:`~hysop.gpu.tools.get_opengl_shared_environment`: build or get an OpenCL environment with openGL properties. @@ -129,17 +129,17 @@ def get_or_create_opencl_env( precision = Precision.DEFAULT_PRECISION, gl_sharing=False, comm=None): """ - Create or an OpenCLEnvironment from given parameters if it does not already exists. + Create or an OpenClEnvironment from given parameters if it does not already exists. All environements are kept alive (cached) in a dictionary local to this - function (ie. all opencl operators can share the same OpenCLEnvironment). + function (ie. all opencl operators can share the same OpenClEnvironment). """ key = (platform_id,device_id, device_type, gl_sharing, comm) if key in get_or_create_opencl_env.opencl_environments: return get_or_create_opencl_env.opencl_environments[key] - from hysop.backend.opencl.clenv import OpenCLEnvironment - env = OpenCLEnvironment(platform_id=platform_id, device_id=device_id, + from hysop.backend.opencl.opencl_env import OpenClEnvironment + env = OpenClEnvironment(platform_id=platform_id, device_id=device_id, device_type=device_type, gl_sharing=gl_sharing, comm=comm) get_or_create_opencl_env.opencl_environments[key] = env diff --git a/hysop/deps.py b/hysop/deps.py index fe6938772..29dd22101 100644 --- a/hysop/deps.py +++ b/hysop/deps.py @@ -25,7 +25,7 @@ from abc import ABCMeta, abstractmethod import sys, os, subprocess, platform import inspect, functools, operator import hashlib, gzip, copy, types -import math, re +import math, re, contextlib import itertools as it import numpy as np diff --git a/hysop/numerics/fftw_f/fft3d.f90 b/hysop/numerics/fftw_f/fft3d.f90 index 23580aca7..b3da0f41d 100755 --- a/hysop/numerics/fftw_f/fft3d.f90 +++ b/hysop/numerics/fftw_f/fft3d.f90 @@ -494,7 +494,7 @@ contains call c_f_pointer(cbuffer1, rdatain_many, [howmany,2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)]) call c_f_pointer(cbuffer1, dataout_many, [howmany,halfLength, fft_resolution(c_Z), local_resolution(c_Y)]) - ! create MPI plans for in-place forward/backward DFT (note dimension reversal) + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) n(3) = fft_resolution(c_X) plan_forward1 = fftw_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, & diff --git a/hysop/numerics/stencil/stencil.py b/hysop/numerics/stencil/stencil.py index f1844bcc7..95af6627d 100644 --- a/hysop/numerics/stencil/stencil.py +++ b/hysop/numerics/stencil/stencil.py @@ -8,8 +8,8 @@ """ -from hysop.deps import sm, sp, it, np -from hysop.tools.sympy_utils import recurse_expression_tree +from hysop.deps import sm, sp, it, np, hashlib +from hysop.tools.sympy_utils import recurse_expression_tree, expr2str class Stencil(object): """ @@ -73,31 +73,42 @@ class Stencil(object): :class:`StencilGenerator`: Generate Stencil objects. """ + coeffs = np.atleast_1d(coeffs) + origin = np.atleast_1d(origin) + order = np.atleast_1d(order) + dx = np.atleast_1d(dx) + if (origin<0).any(): raise ValueError('Origin component < 0!\norigin={}'.format(origin)) - if coeffs.ndim==1: - origin = origin if np.isscalar(origin) else origin[0] - order = order if np.isscalar(order) else order[0] - dx = dx if isinstance(dx,sm.Symbol) else dx[0] - else: - coeffs = np.asarray(coeffs) - origin = np.asarray(origin) - order = np.asarray(order) - - if isinstance(dx, list): - dx = np.asarray(dx) - - self.dx = dx + self.dx = dx[0] if dx.size==1 else dx self.error = error - self.origin = origin - self.order = order + self.origin = origin[0] if origin.size==1 else origin + self.order = order[0] if order.size==1 else order self.factor = factor self.coeffs = self._delete_zeros(coeffs) self._update_attributes() + def format_factor(self, svars): + return expr2str(self.factor,svars) + + def has_factor(self): + return (self.factor!=1) + + def non_zero_coefficients(self): + return np.sum(self.coeffs!=0) + + def replace_symbols(self, dic): + if isinstance(self.factor,sm.Basic): + self.factor = self.factor.xreplace(dic) + coeffs = self.coeffs.ravel() + for i,coeff in enumerate(coeffs): + if isinstance(coeff,sm.Basic): + coeffs[i] = coeff.xreplace(dic) + + def _update_attributes(self): self.dim = self.coeffs.ndim self.shape = self.coeffs.shape @@ -201,11 +212,10 @@ class Stencil(object): Zipped offset and coefficient iterator. """ factor = self.factor if include_factor else 1 + svars = dict(zip(svars.keys(),[str(v) for v in svars.values()])) def mapfun(x): offset = x-self.origin - value = factor*self.coeffs[x] - if isinstance(value, sm.Expr): - value = value.xreplace(svars) + value = expr2str(factor*self.coeffs[x],svars) return (offset,value) iterator = np.ndindex(self.shape) iterator = it.imap(mapfun, iterator) diff --git a/hysop/old/gpu.old/QtRendering.py b/hysop/old/gpu.old/QtRendering.py index fa769dcab..3092f4b52 100644 --- a/hysop/old/gpu.old/QtRendering.py +++ b/hysop/old/gpu.old/QtRendering.py @@ -10,8 +10,8 @@ from PyQt4.QtOpenGL import QGLWidget import OpenGL.GL as gl from hysop.backend.opencl.tools import get_opengl_shared_environment from hysop.backend.opencl import cl -from hysop.backend.opencl.gpu_discrete import GPUDiscreteField -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.core.mpi import main_rank from hysop.operator.computational import Computational from hysop.tools.numpywrappers import npw @@ -74,7 +74,7 @@ class QtOpenGLRendering(Computational): """ ## GPU scalar field for df in self.variables[0].discrete_fields.values(): - if isinstance(df, GPUDiscreteField): + if isinstance(df, OpenClDiscreteField): self.gpu_field = df # Create OpenGL VBOs ## VBO for coordinates @@ -138,11 +138,11 @@ class QtOpenGLRendering(Computational): gwi = self.gpu_field.data[self.component].shape else: gwi = self.gpu_field.data[0].shape - self.initCoordinates = KernelLauncher( + self.initCoordinates = OpenClKernelLauncher( self.prg.initPointCoordinates, self.window.widget.cl_env.queue, gwi, None) ## OpenCL kernel for computing colors - self.numMethod = KernelLauncher( + self.numMethod = OpenClKernelLauncher( self.prg.colorize, self.window.widget.cl_env.queue, gwi, None) diff --git a/hysop/old/gpu.old/gpu_diffusion.py b/hysop/old/gpu.old/gpu_diffusion.py index bc69254f9..b6fe69583 100644 --- a/hysop/old/gpu.old/gpu_diffusion.py +++ b/hysop/old/gpu.old/gpu_diffusion.py @@ -10,8 +10,8 @@ from hysop.operator.discrete.discrete import DiscreteOperator from hysop.operator.discrete.discrete import get_extra_args_from_method from hysop.backend.opencl import cl from hysop.backend.opencl.gpu_operator import GPUOperator -from hysop.backend.opencl.gpu_kernel import KernelLauncher -from hysop.backend.opencl.gpu_discrete import GPUDiscreteField +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher +from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField from hysop.tools.profiler import FProfiler from hysop.core.mpi import Wtime @@ -38,8 +38,8 @@ class GPUDiffusion(DiscreteOperator, GPUOperator): **kwds) ## GPU allocation. - alloc = not isinstance(self.field, GPUDiscreteField) - GPUDiscreteField.fromField(self.cl_env, self.field, + alloc = not isinstance(self.field, OpenClDiscreteField) + OpenClDiscreteField.fromField(self.cl_env, self.field, self.gpu_precision, simple_layout=False) if not self.field.gpu_allocated: self.field.allocate() @@ -159,9 +159,9 @@ class GPUDiffusion(DiscreteOperator, GPUOperator): build_options += " -D NB_GROUPS_I={0}".format(blocs_nb[0]) build_options += " -D NB_GROUPS_II={0}".format(blocs_nb[1]) prg = self.cl_env.build_src(src, build_options, vec) - self.num_diffusion = KernelLauncher( + self.num_diffusion = OpenClKernelLauncher( prg.diffusion, self.cl_env.queue, gwi, lwi) - self.copy = KernelLauncher(cl.enqueue_copy, + self.copy = OpenClKernelLauncher(cl.enqueue_copy, self.cl_env.queue) def _compute_diffusion(self, simulation): diff --git a/hysop/old/gpu.old/gpu_discrete.py b/hysop/old/gpu.old/gpu_discrete.py index 0b5e18343..c38245cb2 100644 --- a/hysop/old/gpu.old/gpu_discrete.py +++ b/hysop/old/gpu.old/gpu_discrete.py @@ -5,7 +5,7 @@ from hysop.constants import ORDER, np,\ debug, HYSOP_REAL, DirectionLabels from hysop.fields.discrete import DiscreteField from hysop.backend.opencl import cl, CL_PROFILE -from hysop.backend.opencl.gpu_kernel import KernelLauncher, KernelListLauncher +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher, KernelListLauncher from hysop.tools.profiler import FProfiler fromLayoutMgrFunc_3D_seq = [ @@ -65,7 +65,7 @@ toLayoutMgrFunc_1D = [ ] -class GPUDiscreteField(DiscreteField): +class OpenClDiscreteField(DiscreteField): """GPU Discrete vector field implementation. Allocates OpenCL device memory for the field. """ @@ -94,14 +94,14 @@ class GPUDiscreteField(DiscreteField): indicates if in the Z direction, layout is ZYX (simple) or ZXY. """ # init base class - super(GPUDiscreteField, self).__init__(topology, is_vector, name) + super(OpenClDiscreteField, self).__init__(topology, is_vector, name) # OpenCL environment self.cl_env = cl_env # Precision for the field self.precision = precision # Memory used self.mem_size = 0 - ## Initialization OpenCL kernel as KernelLauncher + ## Initialization OpenCL kernel as OpenClKernelLauncher self.init_kernel = None self._isReleased = False ## OpenCL Buffer pointer @@ -200,9 +200,9 @@ class GPUDiscreteField(DiscreteField): @param layout : Boolean indicating if components are arranged in memory @param simple_layout : Boolean indicating if in the Z direction, """ - if not isinstance(vfield, GPUDiscreteField): + if not isinstance(vfield, OpenClDiscreteField): vfield.__class__ = cls - GPUDiscreteField.__init__( + OpenClDiscreteField.__init__( vfield, cl_env, vfield.topology, vfield.nb_components > 1, vfield.name, precision, layout, simple_layout) @@ -210,7 +210,7 @@ class GPUDiscreteField(DiscreteField): def setInitializationKernel(self, kernel): """ Set the initialization kernel - @param kernel : KernelLauncher to use for initialize field. + @param kernel : OpenClKernelLauncher to use for initialize field. """ self.init_kernel = kernel @@ -247,7 +247,7 @@ class GPUDiscreteField(DiscreteField): t = self.precision(time) if __VERBOSE__: print "{" + str(self._rank) + "}", "Initialize", self.name - isGPUKernel = isinstance(formula, KernelLauncher) \ + isGPUKernel = isinstance(formula, OpenClKernelLauncher) \ or isinstance(formula, KernelListLauncher) if not isGPUKernel and self.init_kernel is None: DiscreteField.initialize(self, formula, False, time, *args) diff --git a/hysop/old/gpu.old/gpu_kernel.py b/hysop/old/gpu.old/gpu_kernel.py index 53526a467..bcbd43030 100644 --- a/hysop/old/gpu.old/gpu_kernel.py +++ b/hysop/old/gpu.old/gpu_kernel.py @@ -123,7 +123,7 @@ class KernelListLauncher(object): for d in xrange(len(self.kernel))] -class KernelLauncher(KernelListLauncher): +class OpenClKernelLauncher(KernelListLauncher): """ OpenCL kernel launcher. @@ -133,7 +133,7 @@ class KernelLauncher(KernelListLauncher): @debug def __init__(self, kernel, queue, gsize=None, lsize=None): """ - Create a KernelLauncher. + Create a OpenClKernelLauncher. Create a KernelListLauncher with a list of one kernel. diff --git a/hysop/old/gpu.old/gpu_multiphase_baroclinic_rhs.py b/hysop/old/gpu.old/gpu_multiphase_baroclinic_rhs.py index 0b2142ce7..84e970eb8 100644 --- a/hysop/old/gpu.old/gpu_multiphase_baroclinic_rhs.py +++ b/hysop/old/gpu.old/gpu_multiphase_baroclinic_rhs.py @@ -10,8 +10,8 @@ from hysop.operator.discrete.discrete import DiscreteOperator from hysop.operator.discrete.discrete import get_extra_args_from_method from hysop.backend.opencl import cl from hysop.backend.opencl.gpu_operator import GPUOperator -from hysop.backend.opencl.gpu_kernel import KernelListLauncher -from hysop.backend.opencl.gpu_discrete import GPUDiscreteField +from hysop.backend.opencl.opencl_kernel import KernelListLauncher +from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField from hysop.tools.profiler import FProfiler from hysop.core.mpi import Wtime from hysop.methods import SpaceDiscretisation @@ -74,8 +74,8 @@ class BaroclinicRHS(DiscreteOperator, GPUOperator): # GPU allocation. for field in self.variables: - alloc = not isinstance(field, GPUDiscreteField) - GPUDiscreteField.fromField(self.cl_env, field, + alloc = not isinstance(field, OpenClDiscreteField) + OpenClDiscreteField.fromField(self.cl_env, field, self.gpu_precision, layout=False) if not field.gpu_allocated: field.allocate() diff --git a/hysop/old/gpu.old/gpu_multiresolution_filter.py b/hysop/old/gpu.old/gpu_multiresolution_filter.py index cabfa87ff..9d57dce43 100644 --- a/hysop/old/gpu.old/gpu_multiresolution_filter.py +++ b/hysop/old/gpu.old/gpu_multiresolution_filter.py @@ -8,8 +8,8 @@ from hysop.tools.numpywrappers import npw from hysop.operator.discrete.multiresolution_filter import FilterFineToCoarse from hysop.backend.opencl.gpu_operator import GPUOperator from hysop.operator.discrete.discrete import get_extra_args_from_method -from hysop.backend.opencl.gpu_discrete import GPUDiscreteField -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.methods import Remesh @@ -38,16 +38,16 @@ class GPUFilterFineToCoarse(FilterFineToCoarse, GPUOperator): **kwds) #GPU allocations - alloc = not isinstance(self.field_in[0], GPUDiscreteField) - GPUDiscreteField.fromField(self.cl_env, self.field_in[0], + alloc = not isinstance(self.field_in[0], OpenClDiscreteField) + OpenClDiscreteField.fromField(self.cl_env, self.field_in[0], self.gpu_precision, layout=False) if not self.field_in[0].gpu_allocated: self.field_in[0].allocate() if alloc: self.size_global_alloc += self.field_in[0].mem_size - alloc = not isinstance(self.field_out[0], GPUDiscreteField) - GPUDiscreteField.fromField(self.cl_env, self.field_out[0], + alloc = not isinstance(self.field_out[0], OpenClDiscreteField) + OpenClDiscreteField.fromField(self.cl_env, self.field_out[0], self.gpu_precision, layout=False) if not self.field_out[0].gpu_allocated: self.field_out[0].allocate() @@ -122,9 +122,9 @@ class GPUFilterFineToCoarse(FilterFineToCoarse, GPUOperator): build_options += " -D WG=" + str(lwi[0]) build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper() prg = self.cl_env.build_src(src, build_options, vec) - self.fine_to_coarse = KernelLauncher( + self.fine_to_coarse = OpenClKernelLauncher( prg.coarse_to_fine_filter, self.cl_env.queue, gwi, lwi) - self.initialize = KernelLauncher( + self.initialize = OpenClKernelLauncher( prg.initialize_output, self.cl_env.queue, self.field_out[0].data[0].shape, None) self._evts = [None, ] * self.field_in[0].dimension diff --git a/hysop/old/gpu.old/gpu_particle_advection.py b/hysop/old/gpu.old/gpu_particle_advection.py index 9a132ce71..305e2667b 100644 --- a/hysop/old/gpu.old/gpu_particle_advection.py +++ b/hysop/old/gpu.old/gpu_particle_advection.py @@ -9,10 +9,10 @@ from hysop.numerics.odesolvers import Euler from hysop.operator.discrete.particle_advection import ParticleAdvection from hysop.operator.discrete.discrete import get_extra_args_from_method from hysop.backend.opencl import cl -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher import hysop.default_methods as default from hysop.tools.numpywrappers import npw -from hysop.backend.opencl.gpu_discrete import GPUDiscreteField +from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField from hysop.backend.opencl.gpu_operator import GPUOperator from hysop.tools.profiler import profile from hysop.numerics.update_ghosts import UpdateGhostsFull @@ -284,15 +284,15 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): """Allocate OpenCL buffers for velocity and advected field. """ # Velocity. - alloc = not isinstance(self.velocity, GPUDiscreteField) - GPUDiscreteField.fromField(self.cl_env, self.velocity, + alloc = not isinstance(self.velocity, OpenClDiscreteField) + OpenClDiscreteField.fromField(self.cl_env, self.velocity, self.gpu_precision, simple_layout=False) if alloc: self.size_global_alloc += self.velocity.mem_size # Transported field. - alloc = not isinstance(self.fields_on_grid[0], GPUDiscreteField) - GPUDiscreteField.fromField(self.cl_env, + alloc = not isinstance(self.fields_on_grid[0], OpenClDiscreteField) + OpenClDiscreteField.fromField(self.cl_env, self.fields_on_grid[0], self.gpu_precision, layout=False) @@ -338,7 +338,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): else: workItemNumber, gwi, lwi = \ self.cl_env.get_work_items(self.resol_dir) - gpudf.setInitializationKernel(KernelLauncher( + gpudf.setInitializationKernel(OpenClKernelLauncher( cl.Kernel(self.prg, k_name), self.cl_env.queue, gwi, lwi)) @@ -360,9 +360,9 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): # src, # build_options, # vec) - # self.copy = KernelLauncher(prg.copy, + # self.copy = OpenClKernelLauncher(prg.copy, # self.cl_env.queue, gwi, lwi) - return KernelLauncher(cl.enqueue_copy, self.cl_env.queue) + return OpenClKernelLauncher(cl.enqueue_copy, self.cl_env.queue) def _collect_kernels_cl_src_transpositions_xy(self): """Compile OpenCL sources for transpositions kernel. @@ -418,7 +418,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): build_options += " -D NB_GROUPS_II={0}".format(blocs_nb[1]) build_options += ocl_cte prg = self.cl_env.build_src(src, build_options, vec) - return KernelLauncher(prg.transpose_xy, self.cl_env.queue, gwi, lwi) + return OpenClKernelLauncher(prg.transpose_xy, self.cl_env.queue, gwi, lwi) def _collect_kernels_cl_src_transpositions_xz(self): resol = self.fields_topo.mesh.resolution @@ -477,7 +477,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): src, build_options, vec) - return KernelLauncher(prg.transpose_xz, self.cl_env.queue, gwi, lwi) + return OpenClKernelLauncher(prg.transpose_xz, self.cl_env.queue, gwi, lwi) def _collect_usr_cl_src(self): """Build user sources. @@ -523,7 +523,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): src, build_options, vec, nb_remesh_components=self.fields_on_grid[0].nb_components) - self.num_advec_and_remesh = KernelLauncher( + self.num_advec_and_remesh = OpenClKernelLauncher( prg.advection_and_remeshing, self.cl_env.queue, gwi, lwi) def _collect_kernels_cl_src_2k(self): @@ -565,7 +565,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): vec, nb_remesh_components=self.fields_on_grid[0].nb_components) - self.num_advec = KernelLauncher( + self.num_advec = OpenClKernelLauncher( prg.advection_kernel, self.cl_env.queue, gwi, lwi) # remeshing @@ -584,7 +584,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): prg = self.cl_env.build_src( src, build_options, vec, nb_remesh_components=self.fields_on_grid[0].nb_components) - self.num_remesh = KernelLauncher( + self.num_remesh = OpenClKernelLauncher( prg.remeshing_kernel, self.cl_env.queue, gwi, lwi) @debug diff --git a/hysop/old/gpu.old/gpu_particle_advection_dir.py b/hysop/old/gpu.old/gpu_particle_advection_dir.py index e3ab7ee88..abbb887a0 100644 --- a/hysop/old/gpu.old/gpu_particle_advection_dir.py +++ b/hysop/old/gpu.old/gpu_particle_advection_dir.py @@ -9,9 +9,9 @@ from hysop.operator.discrete.particle_advection_dir import ParticleAdvectionDir from hysop.numerics.update_ghosts import UpdateGhostsFull from hysop.backend.opencl import cl -from hysop.backend.opencl.gpu_discrete import GPUDiscreteField +from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField from hysop.backend.opencl.gpu_operator import GPUOperator -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.backend.opencl.kernel_autotuner import AutotunerConfig from hysop.backend.codegen.structs.mesh_info import MeshInfoStruct, TranspositionState, MeshDirection @@ -262,7 +262,7 @@ class GPUParticleAdvectionDir(ParticleAdvectionDir, GPUOperator): self.v_resol_dir = v_resol_dir def _collect_copy_kernels(self): - self.copy = KernelLauncher(cl.enqueue_copy, + self.copy = OpenClKernelLauncher(cl.enqueue_copy, self.cl_env.queue) def _collect_transposition_kernels(self): @@ -384,14 +384,14 @@ class GPUParticleAdvectionDir(ParticleAdvectionDir, GPUOperator): raise RuntimeError('_set_work_arrays has not been implemented properly.') # Velocity - GPUDiscreteField.fromField(self.cl_env, self.velocity, + OpenClDiscreteField.fromField(self.cl_env, self.velocity, self.gpu_precision, simple_layout=False) if self.velocity.allocate(): self.size_global_alloc += self.velocity.mem_size # Fields on grids for fg in self.fields_on_grid: - GPUDiscreteField.fromField(self.cl_env, + OpenClDiscreteField.fromField(self.cl_env, fg, self.gpu_precision, layout=False) @@ -448,7 +448,7 @@ class GPUParticleAdvectionDir(ParticleAdvectionDir, GPUOperator): init_field_kernel = cl.Kernel(self.prg, k_name) gpudf.setInitializationKernel( - KernelLauncher(init_field_kernel,self.cl_env.queue,gwi,lwi) + OpenClKernelLauncher(init_field_kernel,self.cl_env.queue,gwi,lwi) ) diff --git a/hysop/old/gpu.old/gpu_stretching.py b/hysop/old/gpu.old/gpu_stretching.py index 1ae2c555f..8b7d191be 100644 --- a/hysop/old/gpu.old/gpu_stretching.py +++ b/hysop/old/gpu.old/gpu_stretching.py @@ -13,8 +13,8 @@ from hysop.operator.discrete.discrete import DiscreteOperator, get_extra_args_fr from hysop.core.mpi import Wtime from hysop.backend.opencl import cl from hysop.backend.opencl.gpu_operator import GPUOperator -from hysop.backend.opencl.gpu_kernel import KernelLauncher -from hysop.backend.opencl.gpu_discrete import GPUDiscreteField +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher +from hysop.backend.opencl.opencl_discrete import OpenClDiscreteField from hysop.tools.profiler import FProfiler from hysop.methods import TimeIntegrator, SpaceDiscretisation, Formulation, Support @@ -73,7 +73,7 @@ class GPUStretching(DiscreteOperator, GPUOperator): ## GPU allocations for field in variables: - GPUDiscreteField.fromField(self.cl_env, field, + OpenClDiscreteField.fromField(self.cl_env, field, self.gpu_precision, simple_layout=False) if field.allocate(): self.size_global_alloc += field.mem_size @@ -168,7 +168,7 @@ class GPUStretching(DiscreteOperator, GPUOperator): self.size_local_alloc += cached_bytes kernels = {} - kernels['stretching'] = KernelLauncher(kernel, cl_env.queue, gwi, lwi) + kernels['stretching'] = OpenClKernelLauncher(kernel, cl_env.queue, gwi, lwi) self.kernels = kernels def _gen_and_build_kernel(self, local_size, global_size, kernel_args, diff --git a/hysop/old/gpu.old/multi_gpu_particle_advection.py b/hysop/old/gpu.old/multi_gpu_particle_advection.py index fc8b24553..d3848365f 100644 --- a/hysop/old/gpu.old/multi_gpu_particle_advection.py +++ b/hysop/old/gpu.old/multi_gpu_particle_advection.py @@ -11,7 +11,7 @@ from hysop.operator.discrete.discrete import get_extra_args_from_method from hysop.methods import TimeIntegrator, MultiScale, Remesh from hysop.numerics.odesolvers import RK2 from hysop.numerics.remeshing import Linear as Linear_rmsh -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.tools.profiler import FProfiler from hysop.backend.opencl import cl, CL_PROFILE from hysop.core.mpi import Wtime @@ -375,13 +375,13 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): build_options += " -D BUFF_WIDTH=" + str(self._s_buff_width) prg = self.cl_env.build_src( src, build_options, 1) - self.num_advec_and_remesh_comm_l = KernelLauncher( + self.num_advec_and_remesh_comm_l = OpenClKernelLauncher( prg.buff_advec_and_remesh_l, self.cl_env.queue, (gwi[1], gwi[2]), (32, 1)) - self.num_advec_and_remesh_comm_r = KernelLauncher( + self.num_advec_and_remesh_comm_r = OpenClKernelLauncher( prg.buff_advec_and_remesh_r, self.cl_env.queue, (gwi[1], gwi[2]), (32, 1)) - self.num_advec_and_remesh = KernelLauncher( + self.num_advec_and_remesh = OpenClKernelLauncher( prg.buff_advec_and_remesh, self.cl_env.queue, gwi, lwi) @@ -409,7 +409,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): build_options += " -D V_BUFF_WIDTH=" + str(self._v_buff_width) prg = self.cl_env.build_src( src, build_options, 1) - self.num_advec = KernelLauncher( + self.num_advec = OpenClKernelLauncher( prg.buff_advec, self.cl_env.queue, gwi, lwi) @@ -429,13 +429,13 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): build_options += " -D BUFF_WIDTH=" + str(self._s_buff_width) prg = self.cl_env.build_src( src, build_options, 1) - self.num_remesh_comm_l = KernelLauncher( + self.num_remesh_comm_l = OpenClKernelLauncher( prg.buff_remesh_l, self._queue_comm_m, (gwi[1], gwi[2]), (32, 1)) - self.num_remesh_comm_r = KernelLauncher( + self.num_remesh_comm_r = OpenClKernelLauncher( prg.buff_remesh_r, self._queue_comm_p, (gwi[1], gwi[2]), (32, 1)) - self.num_remesh = KernelLauncher( + self.num_remesh = OpenClKernelLauncher( prg.remesh, self.cl_env.queue, gwi, lwi) diff --git a/hysop/old/gpu.old/static_gpu_particle_advection_dir.py b/hysop/old/gpu.old/static_gpu_particle_advection_dir.py index 45c78e9a3..636b22dd5 100644 --- a/hysop/old/gpu.old/static_gpu_particle_advection_dir.py +++ b/hysop/old/gpu.old/static_gpu_particle_advection_dir.py @@ -2,7 +2,7 @@ from hysop import __VERBOSE__ from hysop.backend.opencl import cl -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.backend.opencl.gpu_particle_advection_dir import GPUParticleAdvectionDir, MeshDirection from hysop.methods import TimeIntegrator, Remesh, ExtraArgs, \ @@ -89,8 +89,8 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): if order<0: msg='Stretching order < 0' raise ValueError(msg) - if formulation not in StretchingFormulation.entries(): - msg='Stretching formulation is not one of {}.'.format(StretchingFormulation.fields().keys()) + if not isinstance(formulation,StretchingFormulation): + msg='Stretching formulation is not one of {}.'.format(StretchingFormulation.svalues()) raise ValueError(msg) def _initialize_cl_env(self): @@ -213,7 +213,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): build_options += defines prg = self.cl_env.build_src(src, build_options, vec) - return KernelLauncher(prg.transpose_xy, self.cl_env.queue, gwi, lwi) + return OpenClKernelLauncher(prg.transpose_xy, self.cl_env.queue, gwi, lwi) def _build_transpose_kernel_xz(self, resolution, defines): build_options = self._build_options + self._size_constants @@ -237,7 +237,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): build_options += defines prg = self.cl_env.build_src(src,build_options,vec) - return KernelLauncher(prg.transpose_xz, self.cl_env.queue, gwi, lwi) + return OpenClKernelLauncher(prg.transpose_xz, self.cl_env.queue, gwi, lwi) # def _collect_advec_kernel(self): # """ @@ -280,7 +280,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): # nb_remesh_components=self.velocity.nb_components) # callback_profiler.register_tasks('advection') - # self._advec = KernelLauncher( + # self._advec = OpenClKernelLauncher( # prg.advection_kernel, self.cl_env.queue, gwi, lwi) def _collect_remesh_kernel(self): @@ -311,7 +311,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): prg = self.cl_env.build_src( src, build_options, vec, nb_remesh_components=rc) - self._remesh[rc] = KernelLauncher( + self._remesh[rc] = OpenClKernelLauncher( prg.remeshing_kernel, self.cl_env.queue, gwi, lwi) cname = 'remesh' if len(self.required_components)==1 \ @@ -359,7 +359,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): prg = self.cl_env.build_src( src, build_options, vec, nb_remesh_components=rc) - self._advec_and_remesh[rc] = KernelLauncher( + self._advec_and_remesh[rc] = OpenClKernelLauncher( prg.advection_and_remeshing, self.cl_env.queue, gwi, lwi) cname = 'advec_remesh' if len(self.required_components)==1 \ diff --git a/hysop/old/gpu.old/tests/test_copy.py b/hysop/old/gpu.old/tests/test_copy.py index 54f2381ea..a71bdd8e7 100644 --- a/hysop/old/gpu.old/tests/test_copy.py +++ b/hysop/old/gpu.old/tests/test_copy.py @@ -5,7 +5,7 @@ Testing copy kernels. from hysop.backend.opencl import cl from hysop.constants import np from hysop.backend.opencl.tools import get_opencl_environment -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.tools.numpywrappers import npw @@ -22,7 +22,7 @@ def test_copy2D(): int(resolution[1] / 2)) lwi = (8, 8) prg = cl_env.build_src(src_copy, build_options, vec) - copy = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) + copy = OpenClKernelLauncher(prg.copy, cl_env.queue, gwi, lwi) data_in = npw.asrealarray(np.random.random(resolution)) data_out = npw.empty_like(data_in) @@ -59,7 +59,7 @@ def test_copy2D_rect(): int(resolution[1] / 2)) lwi = (8, 8) prg = cl_env.build_src(src_copy, build_options, vec) - copy_x = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) + copy_x = OpenClKernelLauncher(prg.copy, cl_env.queue, gwi, lwi) build_options = "" build_options += " -D NB_I=512 -D NB_II=256" @@ -69,7 +69,7 @@ def test_copy2D_rect(): int(resolution[0] / 2)) lwi = (8, 8) prg = cl_env.build_src(src_copy, build_options, vec) - copy_y = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) + copy_y = OpenClKernelLauncher(prg.copy, cl_env.queue, gwi, lwi) data_in = npw.asrealarray(np.random.random(resolution)) data_out = npw.empty_like(data_in) @@ -121,7 +121,7 @@ def test_copy3D(): # Build code prg = cl_env.build_src(src_copy, build_options, vec) - init_copy = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) + init_copy = OpenClKernelLauncher(prg.copy, cl_env.queue, gwi, lwi) data_in = npw.asrealarray(np.random.random(resolution)) data_out = npw.empty_like(data_in) @@ -161,7 +161,7 @@ def test_copy3D_rect(): int(resolution_x[2])) lwi = (4, 8, 1) prg = cl_env.build_src(src_copy, build_options, vec) - init_copy_x = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) + init_copy_x = OpenClKernelLauncher(prg.copy, cl_env.queue, gwi, lwi) build_options = "" build_options += " -D NB_I=32 -D NB_II=16 -D NB_III=64" @@ -172,7 +172,7 @@ def test_copy3D_rect(): int(resolution_x[2])) lwi = (4, 8, 1) prg = cl_env.build_src(src_copy, build_options, vec) - init_copy_y = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) + init_copy_y = OpenClKernelLauncher(prg.copy, cl_env.queue, gwi, lwi) build_options = "" build_options += " -D NB_I=64 -D NB_II=16 -D NB_III=32" @@ -183,7 +183,7 @@ def test_copy3D_rect(): int(resolution_x[1])) lwi = (4, 8, 1) prg = cl_env.build_src(src_copy, build_options, vec) - init_copy_z = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) + init_copy_z = OpenClKernelLauncher(prg.copy, cl_env.queue, gwi, lwi) data_in = npw.asrealarray(np.random.random(resolution_x)) data_out = np.empty_like(data_in) diff --git a/hysop/old/gpu.old/tests/test_opencl_environment.py b/hysop/old/gpu.old/tests/test_opencl_environment.py index 9e7732fc1..70b893758 100644 --- a/hysop/old/gpu.old/tests/test_opencl_environment.py +++ b/hysop/old/gpu.old/tests/test_opencl_environment.py @@ -1,6 +1,6 @@ """Test hysop implementation of OpenCL basic functionnalities""" import numpy as np -from hysop.backend.opencl.tools import get_opencl_environment, explore, OpenCLEnvironment +from hysop.backend.opencl.tools import get_opencl_environment, explore, OpenClEnvironment from hysop.constants import HYSOP_REAL FLOAT_GPU = np.float32 from hysop.core.mpi import main_comm @@ -14,7 +14,7 @@ def test_opencl_env_default(): explore() # Create default opencl env. cl_env = get_opencl_environment() - assert isinstance(cl_env, OpenCLEnvironment) + assert isinstance(cl_env, OpenClEnvironment) assert cl_env.device is not None assert cl_env.ctx is not None assert cl_env.queue is not None @@ -35,7 +35,7 @@ def test_opencl_env(): cl_env = get_opencl_environment(platform_id=nb_platforms - 1, device_id=nb_devices - 1, precision=FLOAT_GPU, comm=comm) - assert isinstance(cl_env, OpenCLEnvironment) + assert isinstance(cl_env, OpenClEnvironment) assert cl_env.platform == plt assert cl_env.device == device assert cl_env.ctx is not None diff --git a/hysop/old/gpu.old/tests/test_transposition.py b/hysop/old/gpu.old/tests/test_transposition.py index f944d3c5e..11c14821c 100644 --- a/hysop/old/gpu.old/tests/test_transposition.py +++ b/hysop/old/gpu.old/tests/test_transposition.py @@ -5,7 +5,7 @@ Testing copy kernels. from hysop.backend.opencl import cl from hysop.constants import np from hysop.backend.opencl.tools import get_opencl_environment -from hysop.backend.opencl.gpu_kernel import KernelLauncher +from hysop.backend.opencl.opencl_kernel import OpenClKernelLauncher from hysop.tools.numpywrappers import npw @@ -65,7 +65,7 @@ def test_transposition_xy2D(): # Build code prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy = KernelLauncher( + init_transpose_xy = OpenClKernelLauncher( prg.transpose_xy, cl_env.queue, gwi, lwi) _comparison(resolution, resolution, init_transpose_xy, init_transpose_xy, @@ -87,7 +87,7 @@ def test_transposition_xy2D_noVec(): # Build code prg = cl_env.build_src(src_transpose_xy, build_options) - init_transpose_xy = KernelLauncher( + init_transpose_xy = OpenClKernelLauncher( prg.transpose_xy, cl_env.queue, gwi, lwi) _comparison(resolution, resolution, init_transpose_xy, init_transpose_xy, @@ -111,7 +111,7 @@ def test_transposition_xy2D_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[0] / 4) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 4) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy_x = KernelLauncher(prg.transpose_xy, + init_transpose_xy_x = OpenClKernelLauncher(prg.transpose_xy, cl_env.queue, gwi, lwi) @@ -124,7 +124,7 @@ def test_transposition_xy2D_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[1] / 4) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[0] / 4) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy_y = KernelLauncher(prg.transpose_xy, + init_transpose_xy_y = OpenClKernelLauncher(prg.transpose_xy, cl_env.queue, gwi, lwi) _comparison(resolution, resolutionT, @@ -149,7 +149,7 @@ def test_transposition_xy2D_noVec_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[0]) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 4) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy_x = KernelLauncher(prg.transpose_xy, + init_transpose_xy_x = OpenClKernelLauncher(prg.transpose_xy, cl_env.queue, gwi, lwi) @@ -162,7 +162,7 @@ def test_transposition_xy2D_noVec_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[1]) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[0] / 4) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy_y = KernelLauncher(prg.transpose_xy, + init_transpose_xy_y = OpenClKernelLauncher(prg.transpose_xy, cl_env.queue, gwi, lwi) _comparison(resolution, resolutionT, @@ -185,7 +185,7 @@ def test_transposition_xy3D(): build_options += " -D NB_GROUPS_I=" + str((resolution[0] / 2) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 2) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy = KernelLauncher( + init_transpose_xy = OpenClKernelLauncher( prg.transpose_xy, cl_env.queue, gwi, lwi) _comparison(resolution, resolution, init_transpose_xy, init_transpose_xy, @@ -207,7 +207,7 @@ def test_transposition_xy3D_noVec(): build_options += " -D NB_GROUPS_I=" + str((resolution[0]) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 2) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy = KernelLauncher( + init_transpose_xy = OpenClKernelLauncher( prg.transpose_xy, cl_env.queue, gwi, lwi) _comparison(resolution, resolution, init_transpose_xy, init_transpose_xy, @@ -232,7 +232,7 @@ def test_transposition_xy3D_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[0] / 2) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 2) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy_x = KernelLauncher( + init_transpose_xy_x = OpenClKernelLauncher( prg.transpose_xy, cl_env.queue, gwi, lwi) build_options = "" @@ -245,7 +245,7 @@ def test_transposition_xy3D_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[1] / 2) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[0] / 2) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy_y = KernelLauncher( + init_transpose_xy_y = OpenClKernelLauncher( prg.transpose_xy, cl_env.queue, gwi, lwi) _comparison(resolution, resolutionT, init_transpose_xy_x, init_transpose_xy_y, @@ -270,7 +270,7 @@ def test_transposition_xy3D_noVec_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[0]) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 2) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy_x = KernelLauncher( + init_transpose_xy_x = OpenClKernelLauncher( prg.transpose_xy, cl_env.queue, gwi, lwi) build_options = "" @@ -283,7 +283,7 @@ def test_transposition_xy3D_noVec_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[1]) / lwi[0]) build_options += " -D NB_GROUPS_II=" + str((resolution[0] / 2) / lwi[1]) prg = cl_env.build_src(src_transpose_xy, build_options, vec) - init_transpose_xy_y = KernelLauncher( + init_transpose_xy_y = OpenClKernelLauncher( prg.transpose_xy, cl_env.queue, gwi, lwi) _comparison(resolution, resolutionT, init_transpose_xy_x, init_transpose_xy_y, @@ -307,7 +307,7 @@ def test_transposition_xz3D(): build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 4) / lwi[1]) build_options += " -D NB_GROUPS_III=" + str((resolution[2] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz = KernelLauncher( + init_transpose_xz = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) _comparison(resolution, resolution, init_transpose_xz, init_transpose_xz, @@ -331,7 +331,7 @@ def test_transposition_xz3D_noVec(): build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 4) / lwi[1]) build_options += " -D NB_GROUPS_III=" + str((resolution[2] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz = KernelLauncher( + init_transpose_xz = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) _comparison(resolution, resolution, init_transpose_xz, init_transpose_xz, @@ -358,7 +358,7 @@ def test_transposition_xz3D_rect(): build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 4) / lwi[1]) build_options += " -D NB_GROUPS_III=" + str((resolution[2] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz_x = KernelLauncher( + init_transpose_xz_x = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) build_options = "" @@ -373,7 +373,7 @@ def test_transposition_xz3D_rect(): build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 4) / lwi[1]) build_options += " -D NB_GROUPS_III=" + str((resolution[0] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz_z = KernelLauncher( + init_transpose_xz_z = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) _comparison(resolution, resolutionT, init_transpose_xz_x, init_transpose_xz_z, @@ -400,7 +400,7 @@ def test_transposition_xz3D_noVec_rect(): build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 4) / lwi[1]) build_options += " -D NB_GROUPS_III=" + str((resolution[2] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz_x = KernelLauncher( + init_transpose_xz_x = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) build_options = "" @@ -415,7 +415,7 @@ def test_transposition_xz3D_noVec_rect(): build_options += " -D NB_GROUPS_II=" + str((resolution[1] / 4) / lwi[1]) build_options += " -D NB_GROUPS_III=" + str((resolution[0] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz_z = KernelLauncher( + init_transpose_xz_z = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) _comparison(resolution, resolutionT, init_transpose_xz_x, init_transpose_xz_z, @@ -439,7 +439,7 @@ def test_transposition_xz3Dslice(): build_options += " -D NB_GROUPS_III=" + str((resolution[2] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz = KernelLauncher( + init_transpose_xz = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) _comparison(resolution, resolution, init_transpose_xz, init_transpose_xz, @@ -462,7 +462,7 @@ def test_transposition_xz3Dslice_noVec(): build_options += " -D NB_GROUPS_III=" + str((resolution[2] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz = KernelLauncher( + init_transpose_xz = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) _comparison(resolution, resolution, init_transpose_xz, init_transpose_xz, @@ -488,7 +488,7 @@ def test_transposition_xz3Dslice_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[0] / 2) / lwi[0]) build_options += " -D NB_GROUPS_III=" + str((resolution[2] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz_x = KernelLauncher( + init_transpose_xz_x = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) build_options = "" @@ -502,7 +502,7 @@ def test_transposition_xz3Dslice_rect(): build_options += " -D NB_GROUPS_I=" + str((resolution[2] / 2) / lwi[0]) build_options += " -D NB_GROUPS_III=" + str((resolution[0] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz_z = KernelLauncher( + init_transpose_xz_z = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) _comparison(resolution, resolutionT, init_transpose_xz_x, init_transpose_xz_z, @@ -527,7 +527,7 @@ def test_transposition_xz3Dslice_noVec_rect(): build_options += " -D NB_GROUPS_I=" + str(resolution[0] / lwi[0]) build_options += " -D NB_GROUPS_III=" + str((resolution[2] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz_x = KernelLauncher( + init_transpose_xz_x = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) build_options = "" @@ -541,7 +541,7 @@ def test_transposition_xz3Dslice_noVec_rect(): build_options += " -D NB_GROUPS_I=" + str(resolution[2] / lwi[0]) build_options += " -D NB_GROUPS_III=" + str((resolution[0] / 4) / lwi[2]) prg = cl_env.build_src(src_transpose_xz, build_options, vec) - init_transpose_xz_z = KernelLauncher( + init_transpose_xz_z = OpenClKernelLauncher( prg.transpose_xz, cl_env.queue, gwi, lwi) _comparison(resolution, resolutionT, init_transpose_xz_x, init_transpose_xz_z, diff --git a/hysop/old/gpu.old/tools.py b/hysop/old/gpu.old/tools.py index eac2194b2..014681b31 100644 --- a/hysop/old/gpu.old/tools.py +++ b/hysop/old/gpu.old/tools.py @@ -1,7 +1,7 @@ """Classes and tools used to handle openCL interface. -* :class:`~hysop.gpu.tools.OpenCLEnvironment`: +* :class:`~hysop.gpu.tools.OpenClEnvironment`: object handling opencl platform, device ... info. * :func:`~hysop.gpu.tools.get_opengl_shared_environment`: build or get an OpenCL environment with openGL properties. @@ -43,7 +43,7 @@ class KernelError(Exception): return self.err + ': ' + self.msg -class OpenCLEnvironment(object): +class OpenClEnvironment(object): """OpenCL environment informations and useful functions. """ @@ -103,7 +103,7 @@ class OpenCLEnvironment(object): key=main_comm.Get_rank()) # Floating point codegeneration mode - from hysop.backend.codegen.base.types import OpenClTypeGen + from hysop.backend.opencl.opencl_types import OpenClTypeGen _kargs = {'device':self.device, 'context':self.ctx, 'platform':self.platform } if __KERNEL_DEBUG__: _kargs['float_dump_mode'] = 'dec' @@ -196,7 +196,7 @@ class OpenCLEnvironment(object): ## update opencl typegen # Floating point codegeneration mode - from hysop.backend.codegen.base.types import OpenClTypeGen + from hysop.backend.opencl.opencl_types import OpenClTypeGen _kargs = {'device':self.device, 'context':self.ctx, 'platform':self.platform } if __KERNEL_DEBUG__: _kargs['float_dump_mode'] = 'dec' @@ -910,7 +910,7 @@ def get_opengl_shared_environment(platform_id=None, Returns ------- - :class:`~hysop.gpu.tools.OpenCLEnvironment` + :class:`~hysop.gpu.tools.OpenClEnvironment` object handling OpenCL platform, device, context and queue The context is obtained with gl-shared properties depending on the OS. @@ -921,7 +921,7 @@ def get_opengl_shared_environment(platform_id=None, device_id = __DEFAULT_DEVICE_ID__ global __cl_env if __cl_env is None: - __cl_env = OpenCLEnvironment(platform_id, device_id, device_type, + __cl_env = OpenClEnvironment(platform_id, device_id, device_type, precision, gl_sharing=True, comm=comm) else: __cl_env.modify(platform_id, device_id, device_type, @@ -953,7 +953,7 @@ def get_opencl_environment(platform_id=None, Returns ------- - :class:`~hysop.gpu.tools.OpenCLEnvironment` + :class:`~hysop.gpu.tools.OpenClEnvironment` object handling OpenCL platform, device, context and queue """ @@ -963,7 +963,7 @@ def get_opencl_environment(platform_id=None, device_id = __DEFAULT_DEVICE_ID__ global __cl_env if __cl_env is None: - __cl_env = OpenCLEnvironment(platform_id, device_id, device_type, + __cl_env = OpenClEnvironment(platform_id, device_id, device_type, precision, comm=comm) else: __cl_env.modify(platform_id, device_id, device_type, diff --git a/hysop/old/gpu.old/visu/marchingcube.py b/hysop/old/gpu.old/visu/marchingcube.py index 4434c774f..691292979 100644 --- a/hysop/old/gpu.old/visu/marchingcube.py +++ b/hysop/old/gpu.old/visu/marchingcube.py @@ -82,7 +82,7 @@ class Marching_Cube(object): self._cl_env.macros['**HP_SIZE**'] = self._size_ self.prg = self._cl_env.build_src(self.usr_src, options) kernel_name = 'constructHPLevel' + self.field.name.split('_D')[0] - self.numMethod = KernelLauncher(eval('self.prg.' + kernel_name), + self.numMethod = OpenClKernelLauncher(eval('self.prg.' + kernel_name), self.queue, self.gwi, self.lwi) diff --git a/hysop/tools/sympy_utils.py b/hysop/tools/sympy_utils.py index 5ea732a0c..f005e9a52 100644 --- a/hysop/tools/sympy_utils.py +++ b/hysop/tools/sympy_utils.py @@ -1,5 +1,5 @@ -from hysop.deps import np, sm +from hysop.deps import np, sm, copy # unicode subscripts for decimal numbers, signs and parenthesis dec = ['\u208{}'.format(i).decode('unicode-escape') for i in xrange(10)] @@ -179,3 +179,14 @@ def recurse_expression_tree(op, expr): if isinstance(expr, sm.Expr): for arg in expr.args: recurse_expression_tree(op, arg) + +def expr2str(expr, svars, dumper=str): + svars = dict(zip(svars.keys(), [dumper(v) for v in svars.values()])) + expr = copy.deepcopy(expr) + def op(expr): + if isinstance(expr,sm.Symbol) and (expr in svars): + expr.name = svars[expr] + print expr.__class__ + recurse_expression_tree(op,expr) + expr = remove_pows(expr) + return str(expr) -- GitLab