diff --git a/CMakeLists.txt b/CMakeLists.txt index 273806eb0678b2c88d2268d321068e29433ae0e3..095c983cc1b98f676a263d91d501464cefdafaff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,7 +142,7 @@ find_python_module(gmpy2 REQUIRED) find_python_module(subprocess32 REQUIRED) find_python_module(editdistance REQUIRED) find_python_module(portalocker REQUIRED) -find_python_module(colors.py REQUIRED) +find_python_module(colors REQUIRED) # color.py package find_python_module(tee REQUIRED) find_python_module(primefac REQUIRED) find_python_module(graph_tool REQUIRED) diff --git a/ci/docker_images/ubuntu/bionic/Dockerfile b/ci/docker_images/ubuntu/bionic/Dockerfile index 6284b2a57bf41209663b8496e350e02c203d4f16..575d705383d3c09d5fa9cc9a74f9b9e07b1aa6b6 100644 --- a/ci/docker_images/ubuntu/bionic/Dockerfile +++ b/ci/docker_images/ubuntu/bionic/Dockerfile @@ -146,8 +146,21 @@ RUN cd /tmp \ && cd - \ && rm -Rf /tmp/Oclgrind +# clpeak +RUN cd /tmp \ + && git clone https://github.com/krrishnarraj/clpeak \ + && cd clpeak/ \ + && mkdir build \ + && cd build/ \ + && cmake .. \ + && make \ + && mv clpeak /usr/local/bin/ \ + && cd - \ + && rm -Rf /tmp/clpeak + # clFFT RUN cd /tmp \ + && ln -s /usr/local/lib /usr/local/lib64 \ && git clone https://github.com/clMathLibraries/clFFT \ && cd clFFT \ && cd src \ @@ -180,7 +193,9 @@ RUN cd /tmp \ && make install \ && cd - \ && rm -Rf /tmp/graph-tool-2.26 - + +# ensure all libraries are known by the runtime linker +RUN ldconfig # clean cached packages RUN rm -rf /var/lib/apt/lists/* diff --git a/examples/example_utils.py b/examples/example_utils.py index b1b143088d62b3d2bf5b2815141b8bb834bd7f75..230667fff4deb041aabdb720ffe684a7acaf5d44 100644 --- a/examples/example_utils.py +++ b/examples/example_utils.py @@ -174,9 +174,9 @@ class HysopArgParser(argparse.ArgumentParser): with self.redirect_stdout(rank, args):#, self.redirect_stderr(rank, args): from hysop.tools.contexts import printoptions - with printoptions(threshold=10000, linewidth=240, + with printoptions(threshold=10000, linewidth=2000, nanstr='nan', infstr='inf', - formatter={'float': lambda x: '{:>6.2f}'.format(x)}): + formatter={'float': lambda x: '{:>8.4f}'.format(x)}): program(args, **kwds) @staticmethod diff --git a/examples/shear_layer/shear_layer.py b/examples/shear_layer/shear_layer.py index 8693accf62e1b8ebc4f22d08469ede4d22069367..b463e5e8ab82885989b2a599419f1f1105df399c 100644 --- a/examples/shear_layer/shear_layer.py +++ b/examples/shear_layer/shear_layer.py @@ -161,15 +161,15 @@ def compute(args): dfields = problem.input_discrete_fields dfields[vorti].initialize(formula=init_vorticity) - from hysop.tools.debug_utils import ImshowDebugger - dbg = ImshowDebugger(data={'Wz':(dfields[vorti],0)}, - ntimes=1, - enable_on_op_apply=False) - dbg.synchronize_queue(cl_env.default_queue) - dbg('initialization') + #from hysop.tools.debug_utils import ImshowDebugger + #dbg = ImshowDebugger(data={'Wz':(dfields[vorti],0)}, + #ntimes=1, + #enable_on_op_apply=False) + #dbg.synchronize_queue(cl_env.default_queue) + #dbg('initialization') # Finally solve the problem - problem.solve(simu, dry_run=args.dry_run, dbg=dbg) + problem.solve(simu, dry_run=args.dry_run) # Finalize problem.finalize() diff --git a/hysop/backend/device/codegen/functions/directional_remesh.py b/hysop/backend/device/codegen/functions/directional_remesh.py index 5c4d042a485d70b2ad149065f8951f2307d1a1aa..87f00dbe22f09bbc9ce00dc05b2b4b080113f8cf 100644 --- a/hysop/backend/device/codegen/functions/directional_remesh.py +++ b/hysop/backend/device/codegen/functions/directional_remesh.py @@ -26,6 +26,9 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): use_short_circuit=None, known_args=None, debug_mode=False): + + + debug_mode = True check_instance(sboundary,tuple,values=BoundaryCondition) check_instance(remesh_kernel, RemeshKernel) @@ -216,11 +219,11 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): printf_no_vector=True if printf_no_vector: - vnf = '[{}]'.format(', '.join('%2.2f' for _ in xrange(nparticles))) + vnf = '[{}]'.format(', '.join('%2.4f' for _ in xrange(nparticles))) vni = '[{}]'.format(', '.join('%i' for _ in xrange(nparticles))) expand_printf_vector = lambda x: ','.join(x[i] for i in xrange(nparticles)) else: - vnf = '[%2.2{}f]'.format(self._printv(nparticles)) + vnf = '[%2.4{}f]'.format(self._printv(nparticles)) vni = '[%{}i]'.format(self._printv(nparticles)) expand_printf_vector = lambda x: str(x) epv = expand_printf_vector @@ -264,7 +267,8 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): eps.declare(s) s.jumpline() - #s.decl_aligned_vars(tst0, tst1) + if debug_mode: + s.decl_aligned_vars(tst0, tst1) s.decl_aligned_vars(rp, find, ind, y) vone.declare(s, const=True) s.decl_vars(*weights) @@ -274,13 +278,14 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): s.append('{} = {}*{};'.format(rp, pos, inv_dx)) s.append('{} = fract({}, &{});'.format(y, rp, find)) s.append('{} = convert_{}_rtn({});'.format(ind, ivtype, find)) - #s.append('{} = floor({rp});'.format(tst0, rp=rp)) - #s.append('{} = {rp} - floor({rp});'.format(tst1, rp=rp)) if debug_mode: - with self._ordered_wi_execution_(barrier=False): - code = 'printf("%lu p={vnf}, rp={vnf}, floor(rp)={vnf}, rp-floor(rp)={vnf}, ind={vni}, y={vnf}, s0={vnf}, a=%f, b=%f, c=%f.\\n", {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});'.format( + s.append('{} = floor({rp});'.format(tst0, rp=rp)) + s.append('{} = {rp} - floor({rp});'.format(tst1, rp=rp)) + if debug_mode: + #with self._ordered_wi_execution_(barrier=False): + with self._if_('(get_group_id(1)==0) && (get_group_id(1)==0) && get_local_id(0)==35'): + code = 'printf("%lu p={vnf}, p.invdx={vnf}, floor(p/dx)={vnf}, p/dx-floor(p/dx)={vnf}, ind={vni}, y={vnf}, s0={vnf}.\\n", {}, {}, {}, {}, {}, {}, {}, {});'.format( 'get_local_id(0)', epv(pos), epv(rp), epv(tst0), epv(tst1), epv(ind), epv(y), epv(scalars[0]), - 'floor(4.5f)', 'floor((float2)(1.5f,2.54f)).x', 'floor((float2)(1.5f,2.54f)).y', vnf=vnf, vni=vni) s.append(code) y.affect(s, init='{}-{}'.format(vone,y)) @@ -332,13 +337,14 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator): val='{}*{}'.format(w,scalars[0]) cache_idx='{}+{}'.format(cache_ghosts, ind[0]) - if poly_splitted: - printf = 'printf("BATCH {}: %lu wrote {vnf} at idx={vni} with Wl={vnf}, Wr={vnf}, W={vnf}, cond={vni}.\\n",{},{},{},{},{},{},{});'.format( - i, 'get_local_id(0)', val, ind, wl, wr, w, 'y<0.5f', vnf=vnf, vni=vni) - else: - printf = 'printf("BATCH {}: %lu wrote {vnf} at idx {vni} with W={vnf}.\\n",{},{},{},{});'.format( - i, 'get_local_id(0)', val, ind, w, vni=vni, vnf=vnf) - s.append(printf) + with self._if_('(get_group_id(1)==0) && (get_group_id(1)==0) && get_local_id(0)==35'): + if poly_splitted: + printf = 'printf("BATCH {}: %lu wrote {vnf} at idx={vni} with Wl={vnf}, Wr={vnf}, W={vnf}, cond={vni}.\\n",{},{},{},{},{},{},{});'.format( + i, 'get_local_id(0)', val, ind, wl, wr, w, 'y<0.5f', vnf=vnf, vni=vni) + else: + printf = 'printf("BATCH {}: %lu wrote {vnf} at idx {vni} with W={vnf}.\\n",{},{},{},{});'.format( + i, 'get_local_id(0)', val, ind, w, vni=vni, vnf=vnf) + s.append(printf) if not use_atomics: s.barrier(_local=True) if use_atomics: diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py index 1aa60d7498c0410ba3c59f76e63c8db34843e7a4..63a4d1f6bd565b6098168131e21a149b5f4672aa 100644 --- a/hysop/backend/device/codegen/kernels/directional_remesh.py +++ b/hysop/backend/device/codegen/kernels/directional_remesh.py @@ -717,7 +717,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): init=tg.dump(+0.0)) with s._else_(): end_id='{}+{}'.format(local_work, 'l') - if debug_mode: + if debug_mode or True: s.append('printf("%i loaded back %2.2f from position %i.\\n", {}, {}, {});'.format(local_id[0], cached_scalars[0][0][end_id], end_id)) with s._align_() as al: for csi in cached_scalars: @@ -725,6 +725,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator): csij.affect(al, align=True, i='l', init=csij[end_id]) + s.barrier(_local=True) s.jumpline() diff --git a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py index 52bc6db03550148556395f674b216dbbdb9d4675..8c5da4faf68293ebb7f0524862b39166de715784 100644 --- a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py +++ b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py @@ -269,8 +269,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel): precision = extra_kwds['precision'] force_atomics = extra_kwds['force_atomics'] - nparticles_options = [1,2,4,8,16] - use_atomics_options = [True] if force_atomics else [True, False] + nparticles_options = [1,]#2,4,8,16] + use_atomics_options = [True,]# if force_atomics else [True, False] if True in use_atomics_options: cl_env = self.cl_env msg=None @@ -326,9 +326,9 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel): cache_ghosts = extra_kwds['min_s_ghosts'] min_wg_size = npw.ones(shape=work_bounds.work_dim, dtype=npw.int32) - min_wg_size[0] = max(min_wg_size[0], 2*cache_ghosts) + min_wg_size[0] = 36 max_wg_size = npw.ones(shape=work_bounds.work_dim, dtype=npw.int32) - max_wg_size[0] = max(global_work_size[0], min_wg_size[0]) + max_wg_size[0] = 36 return (min_wg_size, max_wg_size) def compute_global_work_size(self, local_work_size, work, extra_parameters, extra_kwds): diff --git a/hysop/backend/device/opencl/operator/directional/advection_dir.py b/hysop/backend/device/opencl/operator/directional/advection_dir.py index f93666b08a51588279cb5af887aacae111a92a2c..c9352d1afc697fc24b6674dff838adde7c6650a5 100644 --- a/hysop/backend/device/opencl/operator/directional/advection_dir.py +++ b/hysop/backend/device/opencl/operator/directional/advection_dir.py @@ -13,7 +13,7 @@ from hysop.backend.device.opencl.opencl_kernel_launcher import OpenClKernelListL class OpenClDirectionalAdvection(DirectionalAdvectionBase, OpenClDirectionalOperator): DEBUG=False - GRAPHICAL_DEBUG=False + GRAPHICAL_DEBUG=True @debug def __init__(self, force_atomics=False, relax_min_particles=False, remesh_criteria_eps=None, @@ -190,8 +190,11 @@ class OpenClDirectionalAdvection(DirectionalAdvectionBase, OpenClDirectionalOper elif self.GRAPHICAL_DEBUG: import numpy as np p = self.dposition + v = self.dvelocity sin = self.dadvected_fields_in.values()[0] sout = self.dadvected_fields_out.values()[0] + + V = v[0].get()[v.compute_slices] self.advection_kernel_launcher(queue=queue, dt=dt).wait() @@ -206,21 +209,27 @@ class OpenClDirectionalAdvection(DirectionalAdvectionBase, OpenClDirectionalOper S0 = np.sum(Sin, axis=1) S1 = np.sum(Sout, axis=1) - failed = np.where((S1-S0)>1e-1)[0] + failed = np.where((S1-S0)>1)[0] if failed.size: - for i in failed: + for i in failed[:1]: print 'AT I={}'.format(i) - slc = (i,slice(250,260)) + slc = (i,slice(None)) + print 'Vx' + print V[slc] print 'P' - print P[slc]*512 + print P[slc]*P.shape[1] print 'SIN' print Sin[slc] print 'SOUT' print Sout[slc] + print 'INDEXES' + print np.arange(Sout[slc].size).astype(np.float32) print 'SCALAR WAS NOT CONSERVED' print ' Sin: {}'.format(S0[i]/S0.size) print ' Sout: {}'.format(S1[i]/S1.size) print + print 'FAILED INDICES' + print failed import sys sys.exit(1) else: