diff --git a/CMakeLists.txt b/CMakeLists.txt index 04b08f506b621747f83bd4f18387f5f516dd1a1f..7203581db508b915940a0b2f4c467785ba75d215 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -197,6 +197,10 @@ if(WITH_FFTW) add_definitions(${FFTW_DEFINES}) endif() +if(WITH_LIB_CXX) + compile_with(Boost REQUIRED) +endif() + if(WITH_EXTRAS) # Arnoldi solver needs zgeev, which means lapack compile_with(LAPACK) @@ -317,7 +321,7 @@ endif() if(WITH_LIB_CXX) #C++ variables used by setup.py.in for swig - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wall -Wextra -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter ${FFTW_COMPILE_FLAGS} -fPIC -std=c++11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wall -Wextra -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter -Wno-unused-local-typedefs ${FFTW_COMPILE_FLAGS} -fPIC -std=c++11") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") @@ -325,11 +329,14 @@ if(WITH_LIB_CXX) set(CXX_FLAGS "${CMAKE_CXX_FLAGS}") set(CXX_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") - set(CXX_EXT_INCLUDES ${PYTHON_INCLUDE_DIR} ${FFTW_INCLUDE_DIRS}) - set(CXX_EXT_LIBS ${PYTHON_LIBRARIES} ${FFTW_LIBRARIES}) - set(CXX_EXT_LIB_DIRS ${FFTW_LIBRARY_DIRS}) + set(CXX_EXT_INCLUDES ${Boost_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR} ${FFTW_INCLUDE_DIRS}) + set(CXX_EXT_LIBS ${PYTHON_LIBRARIES} ${FFTW_LIBRARIES} ${Boost_LIBRARIES}) + set(CXX_EXT_LIB_DIRS ${FFTW_LIBRARY_DIRS} ${Boost_LIBRARY_DIRS}) set(CXX_EXTRA_DEFINES ${FFTW_DEFINES} -DHAS_EXTERN_TEMPLATES) + set(CMAKE_INCLUDE_SYSTEM_FLAG_C "-isystem ") + set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-isystem ") + #swig package name (lib name generated by swig) set(CPP_2_HYSOP "cpp2hysop") endif() diff --git a/hysop/codegen/kernels/stretching.py b/hysop/codegen/kernels/stretching.py index 128777239d66329f3b3815310452d6785a08872b..72ed5136420629c4b121bd27dff8f98df0c8d0c2 100644 --- a/hysop/codegen/kernels/stretching.py +++ b/hysop/codegen/kernels/stretching.py @@ -19,9 +19,10 @@ class CachedStretchingKernel(KernelCodeGenerator): def codegen_name(ftype,work_dim): return 'cached_stretching_{}_{}d'.format(ftype,work_dim) - def __init__(self, typegen, dim, order=2, + def __init__(self, typegen, dim, + device,context, + order=2, ftype=None, - device=None,context=None, known_vars = None, symbolic_mode=True): @@ -29,7 +30,8 @@ class CachedStretchingKernel(KernelCodeGenerator): ftype = ftype if ftype is not None else typegen.fbtype work_dim=3 - kernel_reqs = self.build_requirements(typegen,work_dim, order, cached) + kernel_reqs = self.build_requirements(typegen=typegen,device=device,context=context, + work_dim=work_dim, order=order, cached=cached) kernel_args = self.gen_kernel_arguments(typegen, work_dim, ftype, kernel_reqs) name = CachedStretchingKernel.codegen_name(ftype, dim) @@ -55,16 +57,16 @@ class CachedStretchingKernel(KernelCodeGenerator): return reduce(operator.mul, local_size+order, 1)*self.typegen.FLT_BYTES[self.ftype] - def build_requirements(self,typegen,work_dim,order,cached): + def build_requirements(self,typegen,device,context,work_dim,order,cached): reqs = WriteOnceDict() compute_id = ComputeIndexFunction(typegen=typegen, dim=work_dim, itype='int', wrap=False) reqs['compute_id'] = compute_id - mesh_base_struct = MeshBaseStruct(typegen=typegen, typedef='MeshBaseStruct_s') + mesh_base_struct = MeshBaseStruct(device=device,context=context,typegen=typegen, typedef='MeshBaseStruct_s') reqs['MeshBaseStruct'] = mesh_base_struct - mesh_info_struct = MeshInfoStruct(typegen=typegen, typedef='MeshInfoStruct_s') + mesh_info_struct = MeshInfoStruct(device=device, context=context, typegen=typegen, typedef='MeshInfoStruct_s') reqs['MeshInfoStruct'] = mesh_info_struct gradient = GradientFunction(typegen=typegen, dim=work_dim, order=order, @@ -163,11 +165,22 @@ class CachedStretchingKernel(KernelCodeGenerator): if __name__ == '__main__': - - tg = OpenClTypeGen('float', 'dec') - ek = CachedStretchingKernel(typegen=tg, order=16, dim=1 ,ftype=tg.fbtype, - known_vars=dict(local_size=(1024,1,1))) - ek.edit() - ek.test_compile() - #print ek - print + + import pyopencl as cl + + devices = [] + contexts = {} + for plat in cl.get_platforms(): + devices += plat.get_devices() + for dev in devices: + ctx = cl.Context([dev]) + contexts[dev] = ctx + + tg = OpenClTypeGen('float', 'dec') + for dev,ctx in contexts.iteritems(): + ek = CachedStretchingKernel(typegen=tg, context=ctx, device=dev, + order=16, dim=1 ,ftype=tg.fbtype, + known_vars=dict(local_size=(1024,1,1))) + ek.edit() + ek.test_compile() + break diff --git a/hysop/constants.py b/hysop/constants.py index 5894a663591087c1a71e00828d9ccc7103acc216..3f9323cdccf16d799ecf9efb70974b5df9d9fddd 100755 --- a/hysop/constants.py +++ b/hysop/constants.py @@ -111,10 +111,10 @@ def debugdecorator(f): debug = debugdecorator # redefine profile decorator -if __PROFILE__: - from memory_profiler import profile - prof = profile -else: - def prof(f): - # Nothing ... - return f +# if __PROFILE__: + # from memory_profiler import profile + # prof = profile +# else: +def prof(f): + # Nothing ... + return f diff --git a/hysop/gpu/static_gpu_particle_advection_dir.py b/hysop/gpu/static_gpu_particle_advection_dir.py index d14e76332eb0064d65da3ecbce0abdbecb7dc665..33e3508f88cfd9d1daae19883a3cf83fac46ceec 100644 --- a/hysop/gpu/static_gpu_particle_advection_dir.py +++ b/hysop/gpu/static_gpu_particle_advection_dir.py @@ -44,6 +44,9 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): 'transpose_xz':[], 'transpose_zx':[], 'stretching':[], + 'advec':[], + 'remesh':[], + 'advec_remesh':[] } # Additional method and configuration checks @@ -418,7 +421,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): mesh_info = self._fields_mesh_info_var dt=0.1 - nruns=16 + nruns=4 force_renew_cache=True (kernel_launcher, kernel_args, kernel_args_mapping, cached_bytes) = \ @@ -490,6 +493,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): ) evt = self._advec_and_remesh[nbc](*args, wait_for=wait_evts) + self.bench['advec_remesh'].append(evt) fg.events.append(evt) velocity.events.append(evt) @@ -506,6 +510,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): self._cl_mesh_info ]) advec_evt = self._advec(*args,wait_for=velocity.events) + self.bench['advec'].append(advec_evt) velocity.events.append(advec_evt) for (fg,fp) in self.fields_on_part.iteritems(): @@ -518,6 +523,7 @@ class StaticGPUParticleAdvectionDir(GPUParticleAdvectionDir): + [self._cl_mesh_info] ) remesh_evt = self._remesh[nbc](*args, wait_for=[advec_evt]) + self.bench['remesh'].append(remesh_evt) fg.events.append(remesh_evt) if self._has_stretching: @@ -608,7 +614,7 @@ if __name__=='__main__': dim = 3 GHOSTS = 0 NSCALARS = 0 - f_resolution = (65,65,65)[:dim] + f_resolution = (513,513,129)[:dim] v_resolution = f_resolution #v_resolution = (33,33,33)[:dim] ghosts = (GHOSTS,)*dim @@ -750,4 +756,15 @@ if __name__=='__main__': simu.advance() i+=1 + + from hysop.gpu.kernel_autotuner import OpenClKernelStatistics + bench = A._advec_dir[0].bench + for i in xrange(1,dim): + for k,v in A._advec_dir[i].bench.iteritems(): + bench[k] += v + + for name,evts in bench.iteritems(): + if len(evts)>=1: + print name, OpenClKernelStatistics(events=evts,nruns=i) +