diff --git a/hysop/__init__.py b/hysop/__init__.py index 70947c5c950830d633be359024d4b34eb3e29ab1..1382493cb73a43792564a9e8e5cb607d52a42240 100644 --- a/hysop/__init__.py +++ b/hysop/__init__.py @@ -25,7 +25,7 @@ __PROFILE__ = "OFF" in ["0", "1"] __ENABLE_LONG_TESTS__ = "OFF" is "ON" # OpenCL -__DEFAULT_PLATFORM_ID__ = 0 +__DEFAULT_PLATFORM_ID__ = 1 __DEFAULT_DEVICE_ID__ = 0 if __MPI_ENABLED__: diff --git a/hysop/backend/device/codegen/functions/directional_remesh.py b/hysop/backend/device/codegen/functions/directional_remesh.py index bf0614441a0db5de609e518f1ccb0505490879b1..051fdd6207050abd4720128dcbdb0ebbd41b291d 100644 --- a/hysop/backend/device/codegen/functions/directional_remesh.py +++ b/hysop/backend/device/codegen/functions/directional_remesh.py @@ -324,7 +324,9 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): printf = 'printf("BATCH {}: %lu wrote {vnf} at idx {vni} with W={vnf}.\\n",{},{},{},{});'.format( i, 'get_local_id(0)', val, ind, w, vni=vni, vnf=vnf) s.append(printf) - + if not use_atomics: + s.barrier(_local=True) + if use_atomics: s.barrier(_local=True) # def per_work_statistics(self): diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py index eb3a81d2f2e737992b0d92d09e93cf21c02423aa..def55d1f765a53fa3777dff64a5b66e24566fb87 100644 --- a/hysop/backend/device/codegen/kernels/directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/directional_remesh.py @@ -539,26 +539,29 @@ class DirectionalRemeshKernel(KernelCodeGenerator): # when compute_grid_size is not a multiple of nparticles, # we need to go back to scalars load/store for the last particles. def if_last_thread_active(cond=None): - cond = active if (cond is None) else cond + cond = last_active if (cond is None) else cond with s._if_(cond): with s._align_() as al: for j in xrange(nparticles): cond = '({}+{}$ < {})'.format(particle_offset, j, compute_grid_size[0]) yield al, cond, j - if is_periodic: - def if_first_or_last_thread_active(): - #cond = '{} || ({} && {} && ({}<{}))'.format(last_active, first, active, local_id[0], nparticles) - cond = None - return if_last_thread_active(cond) - else: - if_first_or_last_thread_active = if_last_thread_active - @contextlib.contextmanager - def if_thread_active(): - with s._elif_('{}'.format(active)): + def if_thread_active(not_first=False): + if not_first: + cond = '(!{}) && {}'.format(first, active) + else: + cond = active + with s._elif_(cond): yield + def if_first_or_last_thread_active(): + if is_periodic and is_inplace and nparticles==1: + cond= '{} || ({} && ({}>=2*{}))'.format(last_active, first, local_id[0], cache_ghosts) + else: + cond= '{} || {}'.format(last_active, first) + return if_last_thread_active(cond) + with s._kernel_(): s.jumpline() @@ -603,24 +606,19 @@ class DirectionalRemeshKernel(KernelCodeGenerator): kmax.declare(s) last_particle.declare(s) with contextlib.nested(*nested_loops): - s.barrier(_local=True, _global=True) s.comment('Compute global offsets and line pointers') with s._align_() as al: position_global_id.affect(al, i=0, align=True, init='{} + {}'.format(particle_offset, position_grid_ghosts[0])) - if is_inplace: - for _gid, _ghosts in zip(scalars_in_global_id, scalars_in_grid_ghosts): - _gid.affect(al, i=0, align=True, - init='{} + {} - {}'.format(particle_offset, _ghosts[0], cache_ghosts)) - else: + if not is_inplace: for _gid, _ghosts in zip(scalars_in_global_id, scalars_in_grid_ghosts): _gid.affect(al, i=0, align=True, init='{} + {}'.format(particle_offset, _ghosts[0])) - for _gid, _ghosts in zip(scalars_out_global_id, scalars_out_grid_ghosts): - _gid.affect(al, i=0, align=True, - init='{} + {} - {}'.format(particle_offset, _ghosts[0], - cache_ghosts if not is_periodic else 0)) + for _gid, _ghosts in zip(scalars_out_global_id, scalars_out_grid_ghosts): + _gid.affect(al, i=0, align=True, + init='{} + {} - {}'.format(particle_offset, _ghosts[0], + cache_ghosts if not is_periodic else 0)) s.jumpline() s.decl_aligned_vars(*line_vars) @@ -643,7 +641,6 @@ class DirectionalRemeshKernel(KernelCodeGenerator): cs.affect(al, align=True, i='{}'.format(local_id[0]), init=cs[end_id]) - s.barrier(_local=True, _global=True) s.jumpline() @@ -659,7 +656,6 @@ class DirectionalRemeshKernel(KernelCodeGenerator): for cs in cached_scalars: offset = '{}+2*{}'.format(local_offset, cache_ghosts) s.append(s.vstore(nparticles, cs, offset, vzero)) - s.barrier(_local=True, _global=True) s.jumpline() s.comment('Load position and scalars at current index, {} particles at a time.'.format(nparticles)) @@ -667,7 +663,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): load = self.vload(1, line_position, '{}+{}'.format(particle_offset,j), align=True) code = '{} $= ({} $? {} $: {});'.format(pos[j], active_cond, load, tg.dump(0.0)) al.append(code) - if is_inplace: + if is_inplace and not is_periodic: _id = '{}+{}+{}'.format(particle_offset, cache_ghosts, j) else: _id = '{}+{}'.format(particle_offset, j) @@ -680,14 +676,14 @@ class DirectionalRemeshKernel(KernelCodeGenerator): pos.affect(al, align=True, init=self.vload(nparticles, line_position, particle_offset, align=True)) - if is_inplace: + if is_inplace and not is_periodic: _id = '{}+{}'.format(particle_offset, cache_ghosts) else: _id = particle_offset for si, line_sin in zip(scalars, line_scalars_in): si.affect(al, align=True, init=self.vload(nparticles, line_sin, _id,align=True)) - s.barrier(_local=True, _global=True) + s.barrier(_local=True) s.jumpline() @@ -706,7 +702,6 @@ class DirectionalRemeshKernel(KernelCodeGenerator): remesh_kargs['grid_size'] = compute_grid_size[0] call = remesh(**remesh_kargs) s.append('{};'.format(call)) - s.barrier(_local=True, _global=True) s.jumpline() @@ -719,15 +714,20 @@ class DirectionalRemeshKernel(KernelCodeGenerator): else: _id = '{}+{}'.format(particle_offset, j) store = self.vstore(1, line_sout, _id, load, align=True, jmp=True, suppress_semicolon=True) - code = '{} $&& ({});'.format(active_cond, store) + code = '{} $&& ({})'.format(active_cond, store) + if debug_mode: + code += ' && (printf("last_active %i wrote %2.2f from cache position %i to global id %i.\\n", {},{},{},{}));'.format( + local_id[0], load, '{}+{}'.format(local_offset,j), _id) + else: + code += ';' al.append(code) - with if_thread_active(): + with if_thread_active(not_first=True): with s._align_() as al: if debug_mode: s.append('printf("%i wrote {vnf} from cache position %i to global id %i.\\n", {},{},{},{});'.format( local_id[0], self.vload(nparticles, cached_scalars[0], local_offset), - local_offset, particle_offset, + local_offset, particle_offset if not is_periodic else '({}+{}-{})%{}'.format(particle_offset, compute_grid_size[0], cache_ghosts, compute_grid_size[0]), vnf='[%2.2v{}f]'.format(nparticles) if nparticles>1 else '%2.2f')) for sc, line_sout in zip(cached_scalars, line_scalars_out): load = self.vload(nparticles, sc, '{}'.format(local_offset)) @@ -737,10 +737,6 @@ class DirectionalRemeshKernel(KernelCodeGenerator): _id = particle_offset store = self.vstore(nparticles, line_sout, _id, load, align=True, jmp=True) al.append(store) - s.barrier(_local=True, _global=True) - #if debug_mode: - #dbg0.affect(i='position_GID', codegen=s, init=1) - #dbg1.affect(i='position_GID', codegen=s, init=2) if is_periodic: s.jumpline() @@ -750,7 +746,6 @@ class DirectionalRemeshKernel(KernelCodeGenerator): for sc, bsc in zip(cached_scalars, boundary_scalars): load = sc[local_id[0]] store = bsc.affect(al, align=True, i=local_id[0], init=load) - s.barrier(_local=True, _global=True) s.jumpline() with s._if_('{} && ({} < 2*{})'.format(last, local_id[0], cache_ghosts)): @@ -764,8 +759,9 @@ class DirectionalRemeshKernel(KernelCodeGenerator): code = '{} $= {}+{};'.format(line_sout[_gid], val0, val1) al.append(code) if debug_mode: - s.append('printf("%i initiated last write from cache position %i+%i=%i to global id %i+%i-%i+%i=%i.\\n", {}, {},{},{}, {},{},{},{},{});'.format( + s.append('printf("%i initiated last write of value %2.2f + %2.2f = %2.2f from cache position %i+%i=%i to global id %i+%i-%i+%i=%i.\\n", {}, {},{},{}, {},{},{}, {},{},{},{},{});'.format( local_id[0], + val0, val1, '{}+{}'.format(val0,val1), last_particle, local_id[0], '{}+{}'.format(last_particle, local_id[0]), line_offset, last_particle, cache_ghosts, local_id[0], _gid)) @@ -782,7 +778,7 @@ class DirectionalRemeshKernel(KernelCodeGenerator): '{}+{}'.format(last_particle, local_id[0]), line_offset, last_particle, local_id[0], '{}+{}+{}'.format(line_offset, last_particle, local_id[0]))) - s.barrier(_local=True, _global=True) + s.barrier(_local=True) if __name__ == '__main__': diff --git a/hysop/backend/device/codegen/kernels/tests/test_directional_remesh.py b/hysop/backend/device/codegen/kernels/tests/test_directional_remesh.py index b40622a016041dec568093b2cabb30052edc7274..f151f06d4e9e54b601e8a2de1878f09d62e3e608 100644 --- a/hysop/backend/device/codegen/kernels/tests/test_directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/tests/test_directional_remesh.py @@ -13,7 +13,7 @@ from hysop.tools.contexts import printoptions class TestDirectionalRemesh(object): - DEBUG = True + DEBUG = False @classmethod def setup_class(cls, @@ -34,11 +34,11 @@ class TestDirectionalRemesh(object): def _initialize(self, cl_env, cfl): typegen = cl_env.typegen - dtype = cl_env.precision - eps = np.finfo(dtype).eps + dtype = cl_env.precision + eps = np.finfo(dtype).eps # compute grid - compute_grid_size = np.asarray([10,2,1]) + compute_grid_size = np.asarray([97,7,11]) compute_grid_shape = compute_grid_size[::-1] (A,compute_grid_mesh_info) = _test_mesh_info('base_mesh_info',typegen,3,0,compute_grid_size) @@ -124,7 +124,7 @@ class TestDirectionalRemesh(object): ## allocate and initialize buffers random = self.random - random.seed(43) + random.seed(42) uinit = (umax-umin) * random.rand(*compute_grid_shape).astype(dtype) + umin # force worst case on boundaries @@ -213,7 +213,7 @@ class TestDirectionalRemesh(object): Lx = min(typegen.device.max_work_item_sizes[0], typegen.device.max_work_group_size) Lx = min(Lx, 1 << int(np.floor(np.log2(compute_grid_size[0])))) local_work_size = np.asarray([Lx,1,1]) - work_load = np.asarray([1,1,1]) + work_load = np.asarray([1,3,2]) self.dx = dx self.inv_dx = inv_dx @@ -748,12 +748,12 @@ class TestDirectionalRemesh(object): local_work_size = local_work_size[:work_dim] views = { - 'u': view, - 'pos': view, - 'S0_in': S0_in_view, - 'S1_in': S1_in_view, - 'S0_out': S0_out_view, - 'S1_out': S1_out_view, + 'u': view, + 'pos': view, + 'S0_in': S0_in_view, + 'S1_in': S1_in_view, + 'S0_out': S0_out_view, + 'S1_out': S1_out_view, } known_vars = { @@ -782,7 +782,7 @@ class TestDirectionalRemesh(object): debug_mode=self.DEBUG, known_vars=known_vars) - #drk.edit() + # drk.edit() global_work_size = drk.get_global_size(work_size, local_work_size, work_load) (static_shared_bytes, dynamic_shared_bytes, total_sharedbytes) = \ @@ -1038,8 +1038,8 @@ class TestDirectionalRemesh(object): assert cfl>0 check_instance(rk_scheme,ExplicitRungeKutta) - boundaries=[BoundaryCondition.PERIODIC] - nscalars=[1] + nscalars=[1,2] + boundaries=[BoundaryCondition.NONE, BoundaryCondition.PERIODIC] if self.enable_extra_tests: kernels=[(2,1), (2,2), (4,2), (4,4), (6,4), (6,6), (8,4)] @@ -1050,13 +1050,13 @@ class TestDirectionalRemesh(object): is_inplaces=[False, True] remesh_criterias=[None, 1000000] else: - kernels=[(6,4)] - nparticles=[2] - work_dims=[2] - split_polys=[False] - use_atomics=[True] - is_inplaces=[False] - remesh_criterias=[None] + kernels=[(2,1), (2,2), (4,2), (4,4), (6,4), (6,6), (8,4)] + nparticles=[1,2,4,8,16] + work_dims=[1,2,3] + split_polys=[False, True] + use_atomics=[False, True] + is_inplaces=[False, True] + remesh_criterias=[None, 1000000] ntests = 0 for cl_env in iter_clenv(precision=Precision.FLOAT): @@ -1088,14 +1088,14 @@ class TestDirectionalRemesh(object): @opencl_failed - def test_remesh_from_Euler_advection(self, cfl=1.0): + def test_remesh_from_Euler_advection_low_cfl(self, cfl=0.5788): rk_scheme=ExplicitRungeKutta('Euler') self._check_kernels(rk_scheme=rk_scheme, cfl=cfl) - #@opencl_failed - #def test_remesh_from_RK4_advection(self, cfl=0.5): - #rk_scheme=ExplicitRungeKutta('RK4') - #self._check_kernels(rk_scheme=rk_scheme, cfl=cfl) + @opencl_failed + def test_remesh_from_Euler_advection_high_cfl(self, cfl=3.78): + rk_scheme=ExplicitRungeKutta('Euler') + self._check_kernels(rk_scheme=rk_scheme, cfl=cfl) if __name__ == '__main__': if not __HAS_OPENCL_BACKEND__: @@ -1108,7 +1108,8 @@ if __name__ == '__main__': with printoptions(linewidth=200, formatter={'float':lambda x: '{:0.2f}'.format(x)}): - test.test_remesh_from_Euler_advection() + test.test_remesh_from_Euler_advection_low_cfl() + test.test_remesh_from_Euler_advection_high_cfl() TestDirectionalRemesh.teardown_class()