diff --git a/hysop/backend/device/codegen/kernels/tests/test_directional_remesh.py b/hysop/backend/device/codegen/kernels/tests/test_directional_remesh.py index 2faae784ffd21637c55eae63bd0101b09c751071..cb907c1e738d063c39edc5132ddea484666cc934 100644 --- a/hysop/backend/device/codegen/kernels/tests/test_directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/tests/test_directional_remesh.py @@ -4,7 +4,7 @@ from hysop.testsenv import __HAS_OPENCL_BACKEND__, iter_clenv, opencl_failed from hysop.deps import np, copy, math from hysop.backend.device.opencl import cl from hysop.tools.types import check_instance -from hysop.constants import BoundaryCondition +from hysop.constants import BoundaryCondition, Precision from hysop.backend.device.codegen.base.test import _test_mesh_info , _test_typegen from hysop.numerics.odesolvers.runge_kutta import ExplicitRungeKutta from hysop.backend.device.codegen.kernels.directional_remesh import DirectionalRemeshKernel @@ -31,8 +31,8 @@ class TestDirectionalRemesh(object): dx = A['dx'][0][0] inv_dx = A['inv_dx'][0][0] - umax = +100.0 - umin = -100.0 + umax = +10.0 + umin = -10.0 dt = cfl * dx/max(abs(umax),abs(umin)) assert(umin<umax) @@ -61,23 +61,25 @@ class TestDirectionalRemesh(object): compute_grid_bytes = compute_grid_size.size * typegen.FLT_BYTES[typegen.fbtype] - mf = cl.mem_flags - ctx = cl_env.context + mf = cl.mem_flags + ctx = cl_env.context + device = cl_env.device + platform = cl_env.platform host_buffers_init = { 'no_ghosts': { 'u': (umax-umin) * np.random.rand(*compute_grid_shape).astype(dtype) + umin, 'pos': -1 * np.ones(shape=compute_grid_shape, dtype=dtype), - 'S0': -1 * np.ones(shape=compute_grid_shape, dtype=dtype), - 'S1': -1 * np.ones(shape=compute_grid_shape, dtype=dtype), + 'S0': -5 * np.ones(shape=compute_grid_shape, dtype=dtype), + 'S1': +5 * np.ones(shape=compute_grid_shape, dtype=dtype), 'dbg0': -1 * np.ones(shape=compute_grid_shape, dtype=np.int32), 'dbg1': -1 * np.ones(shape=compute_grid_shape, dtype=np.int32), }, 'with_ghosts': { 'u': (umax-umin) * np.random.rand(*compute_grid_shape).astype(dtype) + umin, 'pos': -1 * np.ones(shape=compute_grid_shape, dtype=dtype), - 'S0': -1 * np.ones(shape=S0_grid_shape, dtype=dtype), - 'S1': -1 * np.ones(shape=S1_grid_shape, dtype=dtype), + 'S0': +5 * np.ones(shape=S0_grid_shape, dtype=dtype), + 'S1': -5 * np.ones(shape=S1_grid_shape, dtype=dtype), 'dbg0': -1 * np.ones(shape=compute_grid_shape, dtype=np.int32), 'dbg1': -1 * np.ones(shape=compute_grid_shape, dtype=np.int32), } @@ -113,6 +115,9 @@ class TestDirectionalRemesh(object): host_buffers_reference = copy.deepcopy(host_buffers_init) host_buffers_gpu = copy.deepcopy(host_buffers_init) + Lx = min(typegen.device.max_work_item_sizes[0], typegen.device.max_work_group_size) + local_work_size = np.asarray([Lx,1,1]) + work_load = np.asarray([1,1,1]) self.dx = dx self.inv_dx = inv_dx @@ -124,32 +129,60 @@ class TestDirectionalRemesh(object): self.cl_env = cl_env self.typegen = cl_env.typegen self.queue = cl_env.default_queue - self.context = cl_env.context - self.device = cl_env.device + self.context = ctx + self.device = device self.compute_grid_size = compute_grid_size self.compute_grid_shape = compute_grid_shape self.compute_mesh_info = compute_mesh_info self.compute_grid_bytes = compute_grid_bytes - self.S0_ghosts = S0_ghosts - self.S0_grid_size = S0_grid_size - self.S0_grid_shape = S0_grid_shape - self.S0_mesh_info = S0_mesh_info + self.S0_ghosts = S0_ghosts + self.S0_grid_ghosts = S0_grid_ghosts + self.S0_grid_size = S0_grid_size + self.S0_grid_shape = S0_grid_shape + self.S0_mesh_info = S0_mesh_info - self.S1_ghosts = S1_ghosts - self.S1_grid_size = S1_grid_size - self.S1_grid_shape = S1_grid_shape - self.S1_mesh_info = S1_mesh_info + self.S1_ghosts = S1_ghosts + self.S1_grid_ghosts = S1_grid_ghosts + self.S1_grid_size = S1_grid_size + self.S1_grid_shape = S1_grid_shape + self.S1_mesh_info = S1_mesh_info self.host_buffers_init = host_buffers_init self.host_buffers_reference = host_buffers_reference self.host_buffers_gpu = host_buffers_gpu self.device_buffers = device_buffers - Lx = min(typegen.device.max_work_item_sizes[0], typegen.device.max_work_group_size) - self.local_work_size = np.asarray([Lx,1,1]) - self.work_load = np.asarray([1,1,1]) + self.local_work_size = local_work_size + self.work_load = work_load + + print \ +''' +== Directional Remesh Test == + platform: {} + device: {} + dtype: {} + + compute_grid_size: {} + dx: {} + inv_dx: {} + S0_grid_ghosts: {} + S1_grid_ghosts: {} + + umin: {} + umax: {} + cfl: {} + dt: {} + + local_work_size: {} + work_load: {} +======= end of setup ======== +'''.format(platform.name, device.name, dtype.__name__, + compute_grid_size, dx, inv_dx, + S0_grid_ghosts, S1_grid_ghosts, + umin, umax, cfl, dt, + local_work_size, work_load) @classmethod @@ -163,6 +196,7 @@ class TestDirectionalRemesh(object): pass def _do_advec_on_cpu(self, rk_scheme, boundary): + print '_do_advec_on_cpu({}, {})'.format(rk_scheme.name(), boundary) dt = self.dt dx = self.dx inv_dx = self.inv_dx @@ -238,6 +272,92 @@ class TestDirectionalRemesh(object): host_buffers_reference['pos'] = pos + def _do_remesh_on_cpu(self, rk_scheme, boundary, kernel): + print '_do_remesh_on_cpu({}, {}, {})'.format(rk_scheme, boundary, kernel) + dt = self.dt + dx = self.dx + inv_dx = self.inv_dx + compute_grid_size = self.compute_grid_size + + S0_ghosts=self.S0_grid_ghosts + S1_ghosts=self.S1_grid_ghosts + + is_periodic = False + if boundary == BoundaryCondition.PERIODIC: + is_periodic = True + target = 'no_ghosts' + view = [slice(0,compute_grid_size[2]), + slice(0,compute_grid_size[1]), + slice(0,compute_grid_size[0])] + S0_view = view + S1_view = view + elif boundary == BoundaryCondition.NONE: + target = 'with_ghosts' + S0_view = [slice(S0_ghosts[2],compute_grid_size[2]+S0_ghosts[2]), + slice(S0_ghosts[1],compute_grid_size[1]+S0_ghosts[1]), + slice(S0_ghosts[0],compute_grid_size[0]+S0_ghosts[0])] + S1_view = [slice(S1_ghosts[2],compute_grid_size[2]+S1_ghosts[2]), + slice(S1_ghosts[1],compute_grid_size[1]+S1_ghosts[1]), + slice(S1_ghosts[0],compute_grid_size[0]+S1_ghosts[0])] + else: + msg='Unknown boundary.' + raise ValueError(msg) + + host_init_buffers = self.host_buffers_init[target] + host_buffers_reference = self.host_buffers_reference[target] + + pos = host_buffers_reference['pos'] + S0_in = host_init_buffers['S0'] + S1_in = host_init_buffers['S1'] + S0_out = np.zeros_like(S0_in) + S1_out = np.zeros_like(S1_in) + + dx, inv_dx = self.dx, self.inv_dx + umin, umax, dt = self.umin, self.umax, dt + + min_pos = 0 + pos -= min_pos + + ind = np.floor(pos*inv_dx) + y = (pos - ind*dx)*inv_dx + assert (y>=0).all() and (y<1.0).all() + + min_ind, max_ind = np.min(ind), np.max(ind) + theorical_min_ind = np.floor(umin*dt*inv_dx) + theorical_max_ind = np.floor(compute_grid_size[0]+umax*dt*inv_dx-1) + + msg0='min_ind={} <= floor(umin*dt*inv_dx)={}'.format(min_ind, theorical_min_ind) + if min_ind < theorical_min_ind: + raise RuntimeError(msg0) + else: + print msg0 + + msg1='max_ind={} <= floor(umax*dt*inv_dx)={}'.format(max_ind, theorical_max_ind) + if max_ind > theorical_max_ind: + raise RuntimeError(msg1) + else: + print msg1 + + if is_periodic: + ind = (ind+compute_grid_size[0])%compute_grid_size[0] + + assert kernel.n % 2 == 0 + S0_out[...] = 0 + S1_out[...] = 0 + for i in xrange(-kernel.n, +kernel.n): + if is_periodic: + _ind = (ind+i+compute_grid_size[0]) % compute_grid_size[0] + else: + _ind = ind+i + yi = y+i + wi = kernel.gamma(yi) + S0_out[:,:,_ind] += wi*S0_in[:,:,ind] + S1_out[:,:,_ind] += wi*S1_in[:,:,ind] + + + host_buffers_reference['S0'][...] = S0_out + host_buffers_reference['S1'][...] = S1_out + def _do_compute_gpu_and_check(self, rk_scheme, boundary, cached, nparticles, work_dim): msg = '\nTesting {}directional {}d remesh with {} scheme and {} boundaries, {} particles at a time.'\ @@ -424,26 +544,27 @@ class TestDirectionalRemesh(object): nscalars=[1,2] if self.enable_extra_tests: - kernel=[RemeshKernel(2,2), RemeshKernel(4,4), RemeshKernel(8,4)] + kernels=[RemeshKernel(2,1), RemeshKernel(2,2), RemeshKernel(4,4)] nparticles=[1,2,4,8,16] work_dims=[1,2,3] is_inplaces=[False, True] use_atomics=[False, True] remesh_criterias=[None,100] else: - kernel=[RemeshKernel(2,2)] + kernels=[RemeshKernel(2,2)] nparticles=[1,2] work_dims=[2] is_inplaces=[True] use_atomics=[True] remesh_criterias=[None] - for cl_env in iter_clenv(): + for cl_env in iter_clenv(precision=Precision.FLOAT): self._initialize(cl_env=cl_env, cfl=cfl) for boundary in boundaries: + print self._do_advec_on_cpu(boundary=boundary, rk_scheme=rk_scheme) - # for kernel in kernels: - # self._do_remesh_on_cpu(boundary=boundary, rk_scheme=rk_scheme, kernel=kernel) + for kernel in kernels: + self._do_remesh_on_cpu(boundary=boundary, rk_scheme=rk_scheme, kernel=kernel) # for work_dim in work_dims: # for is_inplace in is_inplaces: # for use_atomic in use_atomics: diff --git a/hysop/backend/device/opencl/opencl_tools.py b/hysop/backend/device/opencl/opencl_tools.py index 2640d83455c71460656f39faefe4e766c85be89a..2e2c5d909501e005183771b739685ceea5ab3a55 100644 --- a/hysop/backend/device/opencl/opencl_tools.py +++ b/hysop/backend/device/opencl/opencl_tools.py @@ -142,7 +142,8 @@ def get_or_create_opencl_env(comm=None, device_id = __DEFAULT_DEVICE_ID__, device_type = DeviceType.ALL, precision = Precision.DEFAULT, - gl_sharing=False): + gl_sharing=False, + **kargs): """ Create or an OpenClEnvironment from given parameters if it does not already exists. All environements are kept alive (cached) in a dictionary local to this @@ -165,7 +166,7 @@ def get_or_create_opencl_env(comm=None, from hysop.backend.device.opencl.opencl_env import OpenClEnvironment env = OpenClEnvironment(platform_id=platform_id, device_id=device_id, - device_type=device_type, gl_sharing=gl_sharing, comm=comm) + device_type=device_type, gl_sharing=gl_sharing, comm=comm, **kargs) opencl_envs[key] = env diff --git a/hysop/numerics/odesolvers/runge_kutta.py b/hysop/numerics/odesolvers/runge_kutta.py index 04b76d67495fa774e50f6f775ccc07e83deaeac4..b5a302537df7e684080cc6f5d2c16f428d40d603 100644 --- a/hysop/numerics/odesolvers/runge_kutta.py +++ b/hysop/numerics/odesolvers/runge_kutta.py @@ -9,7 +9,8 @@ def I_dump(i): return '{}'.format(z) class TimeIntegrator(object): - pass + def __str__(self): + return self.name() class RungeKutta(TimeIntegrator): pass diff --git a/hysop/numerics/remesh/kernel_generator.py b/hysop/numerics/remesh/kernel_generator.py index a1f49375d5b28d1ef320f8c6a4f509b794abba55..870e0a1746e2b4a2fe62b2158e1e0671c5de1099 100644 --- a/hysop/numerics/remesh/kernel_generator.py +++ b/hysop/numerics/remesh/kernel_generator.py @@ -38,7 +38,7 @@ class Kernel(object): Use SymmetricKernelGenerator or KernelGenerator to generate a kernel. Do not call directly. - split_polys: Split each polynomial in two, to gain + split_polys: Split each polynomial in two, to gain precision. """ dic = kargs @@ -60,6 +60,7 @@ class Kernel(object): Ms = self.Ms deg = self.deg P = self.P + self.Px = P if verbose: print ' => Substitution in Polynomials' @@ -84,6 +85,10 @@ class Kernel(object): Cr[:,i] = np.asarray(Pit_r.all_coeffs(), dtype=float) Pt_l.append(Pit_l) Pt_r.append(Pit_r) + self.Cl = Cl + self.Cr = Cr + self.Pt_l = Pt_l + self.Pt_r = Pt_r if verbose: print ' => Splitting polynomials' @@ -101,9 +106,13 @@ class Kernel(object): print ' ',sm.horner(p) else: C = np.empty(shape=(deg+1,2*Ms),dtype=float) + Pt = [] for i,Pix in enumerate(P): Pit = Pix.xreplace({x:t+X[i]}) C[:,i] = np.asarray(Pit.all_coeffs(), dtype=float) + Pt.append(Pit) + self.Pt = Pt + self.C = C if verbose: print ' => Generating lambdas' diff --git a/hysop/numerics/remesh/remesh.py b/hysop/numerics/remesh/remesh.py index 96154f590154f9fac1ec8c31c7eec7177534e278..837799e3c40732394b588bbb8bca650b8f087294 100644 --- a/hysop/numerics/remesh/remesh.py +++ b/hysop/numerics/remesh/remesh.py @@ -24,6 +24,9 @@ class RemeshKernel(Kernel): super(RemeshKernel,self).__init__(**kargs) + def __str__(self): + return 'RemeshKernel(n={}, r={}, split={})'.format(self.n, self.r, self.poly_splitted) + if __name__=='__main__': import numpy as np diff --git a/hysop/testsenv.py b/hysop/testsenv.py index 64eebbf5292d36a19db462e0780c070fb6bf917b..4c6e146e265c4f9633106206d5b01313d9c87dfb 100644 --- a/hysop/testsenv.py +++ b/hysop/testsenv.py @@ -38,14 +38,14 @@ if __HAS_OPENCL_BACKEND__: """ return f - def iter_clenv(): + def iter_clenv(**kargs): """Iterate over all platforms and device and yield OpenClEnvironments. If __ENABLE_LONG_TESTS__ is False, just yield default clenv.""" if __ENABLE_LONG_TESTS__: envs = [] for i,plat in enumerate(cl.get_platforms()): for j,dev in enumerate(plat.get_devices()): - env = get_or_create_opencl_env(i,j) + env = get_or_create_opencl_env(platform_id=i, device_id=j, **kargs) assert env not in envs envs.append(env) yield env