diff --git a/Examples/levelSet2D_OpenCL-CPU.py b/Examples/levelSet2D_OpenCL-CPU.py new file mode 100644 index 0000000000000000000000000000000000000000..90829ce90c8d67bfb20299e2daf23804fe3183b2 --- /dev/null +++ b/Examples/levelSet2D_OpenCL-CPU.py @@ -0,0 +1,111 @@ +import parmepy +#parmepy.__VERBOSE__ = True +from parmepy.domain.box import Box +# import parmepy.gpu +# parmepy.gpu.CL_PROFILE = True +from parmepy.fields.continuous import Field +from parmepy.operator.advection import Advection +from parmepy.problem.transport import TransportProblem +from parmepy.operator.monitors.printer import Printer +from parmepy.problem.simulation import Simulation +from parmepy.operator.analytic import Analytic +from parmepy.operator.redistribute import Redistribute +from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh,\ + Support, Splitting, MultiScale +#from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK +from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK +from parmepy.numerics.interpolation import Linear +from parmepy.numerics.remeshing import L6_6 as rmsh +from parmepy.mpi.main_var import main_size +from parmepy.constants import np, HDF5 + + +def vitesse(res, x, y, t=0.): + res[0][...] = -np.sin(x * np.pi) ** 2 * np.sin(y * np.pi * 2) * \ + np.cos(t * np.pi / 3.) + res[1][...] = np.sin(y * np.pi) ** 2 * np.sin(x * np.pi * 2) * \ + np.cos(t * np.pi / 3.) + return res + + +def scalaire(res, x, y, t=0.): + rr = np.sqrt((x - 0.5) ** 2 + (y - 0.75) ** 2) + res[0][...] = 0. + res[0][rr < 0.15] = 1. + return res + + +dim = 2 +boxLength = [1., 1.] +boxMin = [0., 0.] +nbElem = [513, ] * 2 + +timeStep = 0.025 +finalTime = 3. +outputFilePrefix = 'levelSet_2D_' +outputModulo = 10 +simu = Simulation(tinit=0.0, tend=finalTime, timeStep=timeStep, iterMax=120) + +## Domain +box = Box(dim, length=boxLength, origin=boxMin) + +## Fields +scal = Field(domain=box, name='Scalar', formula=scalaire + ) +velo = Field(domain=box, name='Velocity', isVector=True, formula=vitesse + ) + +## Operators +# By default, with a 'gpu' support, operator is going to use the defaut device +# given at cmake. To specify an other device, user must set the proper +# parameters: platform_id and device_id. +# parameter device_type can be used to get a specific device type +advec = Advection(velo, scal, + resolutions={velo: nbElem, + scal: nbElem}, + method={TimeIntegrator: RK, + Interpolation: Linear, + Remesh: rmsh, + Support: 'gpu_1k', + Splitting: 'o2'}, + # platform_id=0, + # device_id=0, + # device_type='cpu' + ) +advec.discretize() +velocity = Analytic(velo, + resolutions={velo: nbElem}, + topo=advec.advecDir[0].discreteFields[velo].topology + ) + +if main_size > 1: + distrForAdvecY = Redistribute([velo], velocity, advec.advecDir[1], + component=1) + +##Problem +# Sequential : no need of redistribute +if main_size == 1: + pb = TransportProblem([velocity, advec], simu, dumpFreq=-1) +else: + pb = TransportProblem([velocity, distrForAdvecY, advec], + simu, dumpFreq=-1) + +## Setting solver to Problem +pb.setUp() + +print scal.topoInit +p = Printer(variables=[scal], + topo=scal.topoInit, + frequency=outputModulo, + prefix=outputFilePrefix, + formattype=HDF5) +pb.addMonitors([p]) +p.apply(simu) + +## Solve problem +pb.solve() + +p.apply(simu) + +pb.finalize() +print pb.timer diff --git a/HySoP/CMakeLists.txt b/HySoP/CMakeLists.txt index 7abc196ebfb24e1821ae5e2eea1aa2e428251cf3..54e746b43189c96f11406b42422fb3344312ff36 100644 --- a/HySoP/CMakeLists.txt +++ b/HySoP/CMakeLists.txt @@ -142,10 +142,28 @@ execute_process( string(STRIP ${${PROJECT_NAME}_PYTHON_BUILD_DIR} ${PROJECT_NAME}_PYTHON_BUILD_DIR) # --- OpenCL --- +find_python_module(pyopencl REQUIRED) + +find_python_module(sympy REQUIRED) if(WITH_GPU) - find_python_module(pyopencl REQUIRED) - find_python_module(sympy REQUIRED) +execute_process( + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/opencl_explore.py "EXPLORE") +execute_process( + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/opencl_explore.py + OUTPUT_VARIABLE OPENCL_DEFAULT_OPENCL_ID) +else() +execute_process( + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/opencl_explore.py "EXPLORE" CPU) +execute_process( + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/opencl_explore.py CPU + OUTPUT_VARIABLE OPENCL_DEFAULT_OPENCL_ID) endif() +string(REPLACE " " ";" MY_LIST ${OPENCL_DEFAULT_OPENCL_ID}) +list(GET MY_LIST 0 OPENCL_DEFAULT_OPENCL_PLATFORM_ID) +list(GET MY_LIST 1 OPENCL_DEFAULT_OPENCL_DEVICE_ID) +display(OPENCL_DEFAULT_OPENCL_PLATFORM_ID) +display(OPENCL_DEFAULT_OPENCL_DEVICE_ID) + # --- MPI --- diff --git a/HySoP/ParmesToSinglePrecision.patch b/HySoP/ParmesToSinglePrecision.patch index d3a9075d4532aa71feac0d1dfd29dc96a0a9ae98..95a983bcafac9aa730639c5cbe2fc309017c6a2b 100644 --- a/HySoP/ParmesToSinglePrecision.patch +++ b/HySoP/ParmesToSinglePrecision.patch @@ -51,7 +51,7 @@ index 8645a9f..718eb19 100755 start = MPI_WTime() call r2c_3d(omega_x,omega_y,omega_z, ghosts_vort) diff --git parmepy/f2py/scales2py.f90 parmepy/f2py/scales2py.f90 -index cca8fea..0abc5cd 100755 +index ab5b440..d56112e 100755 --- parmepy/f2py/scales2py.f90 +++ parmepy/f2py/scales2py.f90 @@ -6,7 +6,7 @@ use advec, only : advec_init,advec_step,advec_step_Inter_basic,advec_step_Inter_ @@ -63,7 +63,7 @@ index cca8fea..0abc5cd 100755 implicit none -@@ -92,7 +92,7 @@ contains +@@ -93,7 +93,7 @@ contains real(pk), dimension(size(vx,1),size(vx,2),size(vx,3)), intent(inout) :: scal !f2py real(pk) intent(in,out), depend(size(vx,1)) :: scal @@ -76,7 +76,7 @@ diff --git parmepy/operator/tests/test_velocity_correction.py parmepy/operator/t old mode 100755 new mode 100644 diff --git setup.py.in setup.py.in -index 3c75c97..4a8c17a 100644 +index f771350..22fd086 100644 --- setup.py.in +++ setup.py.in @@ -71,8 +71,8 @@ if(enable_fortran is "ON"): @@ -88,8 +88,8 @@ index 3c75c97..4a8c17a 100644 + parmeslib.append('fftw3f') + parmeslib.append('fftw3f_mpi') parmes_libdir.append(fftwdir) - - withscales = '@WITH_SCALES@' + else: + packages.append('parmepy.fakef2py') diff --git src/client_data.f90 src/client_data.f90 index 46b5268..77178d9 100755 --- src/client_data.f90 diff --git a/HySoP/hysop/__init__.py.in b/HySoP/hysop/__init__.py.in index 23eec22a216b5de1d527d76d1d208a2815935fd3..444a178b0cbc0a54892fe0b38b5437fe4c042ea0 100755 --- a/HySoP/hysop/__init__.py.in +++ b/HySoP/hysop/__init__.py.in @@ -27,6 +27,10 @@ if __MPI_ENABLED__: else: print "Starting @PACKAGE_NAME@ (no mpi) version " + str(__version__) + ".\n" +# OpenCL +__DEFAULT_PLATFORM_ID__ = @OPENCL_DEFAULT_OPENCL_PLATFORM_ID@ +__DEFAULT_DEVICE_ID__ = @OPENCL_DEFAULT_OPENCL_DEVICE_ID@ + version = "1.0.0" ## Box-type physical domain diff --git a/HySoP/hysop/gpu/gpu_particle_advection.py b/HySoP/hysop/gpu/gpu_particle_advection.py index 14e27284deedcf8bc046d07abbaa2334f83f0418..57566923be2ab39208cea1b09e91a939a41d4534 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection.py +++ b/HySoP/hysop/gpu/gpu_particle_advection.py @@ -32,8 +32,8 @@ class GPUParticleAdvection(ParticleAdvection): @abstractmethod def __init__(self, velocity, advectedFields, d, part_position=None, part_advectedFields=None, - platform_id=0, device_id=0, - device_type='gpu', + platform_id=None, device_id=None, + device_type=None, method=None, src=None, precision=PARMES_REAL, batch_nb=None, isMultiScale=False): @@ -73,8 +73,8 @@ class GPUParticleAdvection(ParticleAdvection): self.num_method = None self.dim = self.advectedFields[0].dimension self.cl_env = get_opencl_environment( - platform_id, device_id, - device_type, precision, + platform_id=platform_id, device_id=device_id, + device_type=device_type, precision=precision, comm=self.advectedFields[0].topology.comm) self._main_rank = self.advectedFields[0].topology.comm.Get_rank() self._main_size = self.advectedFields[0].topology.comm.Get_size() diff --git a/HySoP/hysop/gpu/gpu_particle_advection_1k.py b/HySoP/hysop/gpu/gpu_particle_advection_1k.py index 113701c20c20d7c24c8502eb9f1a0fd4a6e6536a..55c8e1f6d8d8584e59408fcc9aebe5b67481b156 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection_1k.py +++ b/HySoP/hysop/gpu/gpu_particle_advection_1k.py @@ -24,8 +24,8 @@ class GPUParticleAdvection1k(GPUParticleAdvection): @debug def __init__(self, velocity, advectedFields, d, part_position=None, part_advectedFields=None, - platform_id=0, device_id=0, - device_type='gpu', + platform_id=None, device_id=None, + device_type=None, method=None, src=None, precision=PARMES_REAL, batch_nb=None, isMultiScale=False): diff --git a/HySoP/hysop/gpu/gpu_particle_advection_2k.py b/HySoP/hysop/gpu/gpu_particle_advection_2k.py index ade1d26d081ce1afd7eff59798c011ba52e2ebf6..571a50eb3528cd85904c556bed6c58347b20f957 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection_2k.py +++ b/HySoP/hysop/gpu/gpu_particle_advection_2k.py @@ -23,8 +23,8 @@ class GPUParticleAdvection2k(GPUParticleAdvection): @debug def __init__(self, velocity, advectedFields, d, part_position=None, part_advectedFields=None, - platform_id=0, device_id=0, - device_type='gpu', + platform_id=None, device_id=None, + device_type=None, method=None, src=None, precision=PARMES_REAL, batch_nb=None, isMultiScale=False): diff --git a/HySoP/hysop/gpu/tests/test_copy.py b/HySoP/hysop/gpu/tests/test_copy.py index 087cf91556960c313aa856b5edf17f2579afd4cf..e275f44f372bf45bc2f52535805c147b400f92de 100644 --- a/HySoP/hysop/gpu/tests/test_copy.py +++ b/HySoP/hysop/gpu/tests/test_copy.py @@ -7,12 +7,10 @@ from parmepy.constants import ORDER, np, PARMES_REAL from parmepy.gpu.tools import get_opencl_environment from parmepy.gpu.gpu_kernel import KernelLauncher -DEVICE_NUMBER = 1 - def test_copy2D(): resolution = (256, 256) - cl_env = get_opencl_environment(0, DEVICE_NUMBER, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 2 src_copy = 'kernels/copy.cl' build_options = "" @@ -50,7 +48,7 @@ def test_copy2D(): def test_copy2D_rect(): resolution = (256, 512) resolutionT = (512, 256) - cl_env = get_opencl_environment(0, DEVICE_NUMBER, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 2 src_copy = 'kernels/copy.cl' build_options = "" @@ -111,7 +109,7 @@ def test_copy2D_rect(): def test_copy3D(): resolution = (64, 64, 64) - cl_env = get_opencl_environment(0, DEVICE_NUMBER, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 4 src_copy = 'kernels/copy.cl' build_options = "" @@ -153,7 +151,7 @@ def test_copy3D_rect(): resolution_x = (16, 32, 64) resolution_y = (32, 16, 64) resolution_z = (64, 16, 32) - cl_env = get_opencl_environment(0, DEVICE_NUMBER, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 4 src_copy = 'kernels/copy.cl' diff --git a/HySoP/hysop/gpu/tests/test_opencl_environment.py b/HySoP/hysop/gpu/tests/test_opencl_environment.py index 25e061d1d6ff4b88a60d5e4e28ad27cae0ac5d2b..0cc2b6aa1ad3aa6ffe664558958878c4858a3bf6 100644 --- a/HySoP/hysop/gpu/tests/test_opencl_environment.py +++ b/HySoP/hysop/gpu/tests/test_opencl_environment.py @@ -8,9 +8,9 @@ def test_queue_unique_creation(): Testing that only one queue is created when multiples calls to get an environment. """ - cl_env = get_opencl_environment(0, 0, 'gpu', FLOAT_GPU) + cl_env = get_opencl_environment() cl_env_id = id(cl_env) - cl_envb = get_opencl_environment(0, 0, 'gpu', FLOAT_GPU) + cl_envb = get_opencl_environment() cl_envb_id = id(cl_envb) assert cl_env_id == cl_envb_id @@ -19,7 +19,7 @@ def test_parse_src_expand_floatN(): """ """ import StringIO - cl_env = get_opencl_environment(0, 0, 'gpu', FLOAT_GPU,) + cl_env = get_opencl_environment() str_as_src = """ vstore__N__((float__N__)(gscal_loc[noBC_id(i+__NN__,nb_part)], ), (i + gidY*WIDTH)/__N__, gscal); @@ -41,7 +41,7 @@ def test_parse_src_expand(): """ """ import StringIO - cl_env = get_opencl_environment(0, 0, 'gpu', FLOAT_GPU) + cl_env = get_opencl_environment() str_as_src = """ gvelo_loc[noBC_id(i+__NN__,nb_part)] = v.s__NN__; """ @@ -62,7 +62,7 @@ def test_parse_expand_remeshed_component(): """ """ import StringIO - cl_env = get_opencl_environment(0, 0, 'gpu', FLOAT_GPU) + cl_env = get_opencl_environment() str_as_src = """ __kernel void advection_and_remeshing(__global const float* gvelo, __RCOMP_P__global const float* pscal__ID__, diff --git a/HySoP/hysop/gpu/tests/test_transposition.py b/HySoP/hysop/gpu/tests/test_transposition.py index fd245a3a5d973b7a64ffde136a22b2e6cd85bfe3..9c35c1da5572a2d38a71fec77fef0a90cd4f306c 100644 --- a/HySoP/hysop/gpu/tests/test_transposition.py +++ b/HySoP/hysop/gpu/tests/test_transposition.py @@ -10,7 +10,7 @@ from parmepy.gpu.gpu_kernel import KernelLauncher def test_transposition_xy2D(): resolution = (256, 256) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 4 src_transpose_xy = 'kernels/transpose_xy.cl' build_options = "" @@ -63,7 +63,7 @@ def test_transposition_xy2D(): def test_transposition_xy2D_rect(): resolution = (512, 256) resolutionT = (256, 512) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 4 src_transpose_xy = 'kernels/transpose_xy.cl' build_options = "" @@ -132,7 +132,7 @@ def test_transposition_xy2D_rect(): def test_transposition_xy3D(): resolution = (32, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 2 src_transpose_xy = 'kernels/transpose_xy.cl' build_options = "" @@ -184,7 +184,7 @@ def test_transposition_xy3D(): def test_transposition_xy3D_rect(): resolution = (32, 64, 32) resolutionT = (64, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 2 src_transpose_xy = 'kernels/transpose_xy.cl' build_options = "" @@ -251,7 +251,7 @@ def test_transposition_xy3D_rect(): def test_transposition_xz3D(): resolution = (32, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 1 src_transpose_xz = 'kernels/transpose_xz_noVec.cl' build_options = "" @@ -303,7 +303,7 @@ def test_transposition_xz3D(): def test_transposition_xz3D_rect(): resolution = (32, 32, 64) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 1 src_transpose_xz = 'kernels/transpose_xz_noVec.cl' build_options = "" @@ -370,7 +370,7 @@ def test_transposition_xz3D_rect(): def test_transposition_xz3Dslice(): resolution = (32, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 1 src_transpose_xz = 'kernels/transpose_xz_slice_noVec.cl' build_options = "" @@ -422,7 +422,7 @@ def test_transposition_xz3Dslice(): def test_transposition_xz3Dslice_rect(): resolution = (32, 32, 64) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL) + cl_env = get_opencl_environment() vec = 1 src_transpose_xz = 'kernels/transpose_xz_slice_noVec.cl' build_options = "" diff --git a/HySoP/hysop/gpu/tools.py b/HySoP/hysop/gpu/tools.py index e6ec65a2942f622f38c485f4f63d8df35d002e6c..e961d30ec62b71dd469a7205f6f858299d3af05d 100644 --- a/HySoP/hysop/gpu/tools.py +++ b/HySoP/hysop/gpu/tools.py @@ -3,8 +3,8 @@ Tools for gpu management. """ -from parmepy import __VERBOSE__ -from parmepy.constants import np +from parmepy import __VERBOSE__, __DEFAULT_PLATFORM_ID__, __DEFAULT_DEVICE_ID__ +from parmepy.constants import np, PARMES_REAL from parmepy.gpu import cl, clTools, GPU_SRC, CL_PROFILE import re FLOAT_GPU, DOUBLE_GPU = np.float32, np.float64 @@ -120,15 +120,6 @@ class OpenCLEnvironment(object): Get an OpenCL platform. @param platform_id : OpenCL platform id @return OpenCL platform - - \code - >>> clenv = OpenCLEnvironment(0,0,'gpu', None, None) - >>> clenv._get_platform(0) == cl.get_platforms()[0] - True - >>> clenv._get_platform(-1) == cl.get_platforms()[-1] - True - >>> - \endcode """ try: # OpenCL platform @@ -136,7 +127,7 @@ class OpenCLEnvironment(object): except IndexError: print " Incorrect platform_id :", platform_id, ".", print " Only ", len(cl.get_platforms()), " available.", - print " Getting defalut platform. " + print " Getting default platform. " platform = cl.get_platforms()[0] if __VERBOSE__: print " Platform " @@ -155,16 +146,35 @@ class OpenCLEnvironment(object): Try to use given parameters and in case of fails, use pyopencl context creation function. """ + display = False try: - device = platform.get_devices( - eval("cl.device_type." + device_type.upper()))[device_id] + if device_type is not None: + device = platform.get_devices( + eval("cl.device_type." + str(device_type.upper())) + )[device_id] + else: + device = platform.get_devices()[device_id] except cl.RuntimeError as e: print "RuntimeError:", e device = cl.create_some_context().devices[0] + display = True except AttributeError as e: print "AttributeError:", e device = cl.create_some_context().devices[0] - if __VERBOSE__: + display = True + except IndexError: + print " Incorrect device_id :", device_id, ".", + print " Only ", len(platform.get_devices()), " available.", + if device_type is not None: + print " Getting first device of type " + \ + str(device_type.upper()) + else: + print " Getting first device of the platform" + device = platform.get_devices()[0] + display = True + if device_type is not None: + assert device_type.upper() == cl.device_type.to_string(device.type) + if display or __VERBOSE__: print " Device" print " - Name :", print device.name @@ -508,8 +518,10 @@ class OpenCLEnvironment(object): return buff_list, new_alloc -def get_opengl_shared_environment(platform_id, device_id, device_type, - precision, comm=None): +def get_opengl_shared_environment(platform_id=__DEFAULT_PLATFORM_ID__, + device_id=__DEFAULT_DEVICE_ID__, + device_type=None, precision=PARMES_REAL, + comm=None): """ Get an OpenCL environment with OpenGL shared enable. @@ -531,7 +543,9 @@ def get_opengl_shared_environment(platform_id, device_id, device_type, return __cl_env -def get_opencl_environment(platform_id, device_id, device_type, precision, +def get_opencl_environment(platform_id=None, + device_id=__DEFAULT_DEVICE_ID__, + device_type=None, precision=PARMES_REAL, comm=None): """ Get an OpenCL environment. @@ -543,6 +557,10 @@ def get_opencl_environment(platform_id, device_id, device_type, precision, @return OpenCL platform, device, context and queue """ + if platform_id is None: + platform_id = __DEFAULT_PLATFORM_ID__ + if device_id is None: + device_id = __DEFAULT_DEVICE_ID__ global __cl_env if __cl_env is None: __cl_env = OpenCLEnvironment(platform_id, device_id, device_type, diff --git a/HySoP/hysop/operator/monitors/printer.py b/HySoP/hysop/operator/monitors/printer.py index 7b1302eb09af44369d7b5d22a6f9183696e594c5..b6ac1deb1451f2ff2e66eecb51adad24bc14c60f 100644 --- a/HySoP/hysop/operator/monitors/printer.py +++ b/HySoP/hysop/operator/monitors/printer.py @@ -122,7 +122,8 @@ class Printer(Monitoring): if simulation is None: raise ValueError("Missing simulation value for monitoring.") - if simulation.currentIteration % self.frequency == 0: + if simulation.currentIteration == -1 or \ + simulation.currentIteration % self.frequency == 0: # Transfer from GPU to CPU if required for f in self.variables: df = f.discreteFields[self.topo]