From 1b60edf98cd07047efee86913c47cf15d4ad9cec Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@imag.fr> Date: Fri, 25 Oct 2013 08:35:50 +0000 Subject: [PATCH] Multi-CPU/Single-GPU --- Examples/levelSet2D.py | 48 +- Examples/levelSet3D.cl | 2 +- Examples/levelSet3D.py | 43 +- HySoP/hysop/fields/continuous.py | 10 +- HySoP/hysop/fields/discrete.py | 2 +- HySoP/hysop/gpu/gpu_analytic.py | 19 +- HySoP/hysop/gpu/gpu_discrete.py | 87 ++-- HySoP/hysop/gpu/gpu_particle_advection.py | 425 +++++++++++++----- HySoP/hysop/gpu/gpu_particle_advection_1k.py | 144 +++--- HySoP/hysop/gpu/gpu_particle_advection_2k.py | 140 +++--- .../tests/test_advection_randomVelocity.py | 4 +- HySoP/hysop/gpu/tests/test_copy.py | 22 +- .../gpu/tests/test_opencl_environment.py | 15 +- HySoP/hysop/gpu/tests/test_transposition.py | 40 +- HySoP/hysop/gpu/tools.py | 76 +--- HySoP/hysop/operator/advection.py | 169 ++++--- HySoP/hysop/operator/advection_dir.py | 5 +- HySoP/hysop/operator/continuous.py | 4 +- HySoP/hysop/operator/discrete/discrete.py | 2 +- HySoP/hysop/operator/monitors/monitoring.py | 2 +- .../hysop/operator/tests/test_advec_scales.py | 9 + HySoP/hysop/problem/problem.py | 22 +- HySoP/hysop/tools/timers.py | 83 ++-- 23 files changed, 775 insertions(+), 598 deletions(-) diff --git a/Examples/levelSet2D.py b/Examples/levelSet2D.py index aefb16881..8fc124a31 100755 --- a/Examples/levelSet2D.py +++ b/Examples/levelSet2D.py @@ -4,7 +4,7 @@ import parmepy from parmepy.domain.box import Box #from parmepy.gpu import PARMES_REAL_GPU as prec_gpu from parmepy.gpu import PARMES_DOUBLE_GPU as prec_gpu -parmepy.gpu.CL_PROFILE = True +#parmepy.gpu.CL_PROFILE = True from parmepy.fields.continuous import Field from parmepy.operator.advection import Advection from parmepy.problem.transport import TransportProblem @@ -19,11 +19,6 @@ from parmepy.numerics.interpolation import Linear from parmepy.numerics.remeshing import L2_1 as rmsh import math -# def vitesse(x, y): -# vx = -math.sin(x * math.pi) ** 2 * math.sin(y * math.pi * 2) -# vy = math.sin(y * math.pi) ** 2 * math.sin(x * math.pi * 2) -# return vx, vy - def vitesse(x, y, t=0): vx = -math.sin(x * math.pi) ** 2 * math.sin(y * math.pi * 2) * \ @@ -41,23 +36,25 @@ def scalaire(x, y): return 0. - +dim = 2 boxLength = [1., 1.] boxMin = [0., 0.] -nbElem = [2049, ] * 2 +nbElem = [513, ] * 2 timeStep = 0.025 finalTime = 3. -outputFilePrefix = './res_2D/levelSet_2D_rect_' -outputModulo = 1 -simu = Simulation(tinit=0.0, tend=finalTime, timeStep=timeStep, iterMax=10000) +outputFilePrefix = './res_2D/levelSet_2D_' +outputModulo = 5 +simu = Simulation(tinit=0.0, tend=finalTime, timeStep=timeStep, iterMax=120) ## Domain box = Box(dim, length=boxLength, origin=boxMin) ## Fields -scal = Field(domain=box, name='Scalar', formula=scalaire) -velo = Field(domain=box, name='Velocity', isVector=True, formula=vitesse) +#scal = Field(domain=box, name='Scalar', formula=scalaire) +#velo = Field(domain=box, name='Velocity', isVector=True, formula=vitesse) +scal = Field(domain=box, name='Scalar') +velo = Field(domain=box, name='Velocity', isVector=True) ## Operators advec = Advection(velo, scal, @@ -66,16 +63,18 @@ advec = Advection(velo, scal, method={TimeIntegrator: RK, Interpolation: Linear, Remesh: rmsh, - Support: '', - Splitting: 'o2_FullHalf'}, - #src=['./levelSet2D.cl'], - #precision=prec_gpu, + Support: 'gpu_2k', + Splitting: 'o2'}, + src=['./levelSet2D.cl'], + precision=prec_gpu, ) +advec.discretize() velocity = Analytic(velo, resolutions={velo: nbElem}, - #method={Support: 'gpu'}, - #src=['./levelSet2D.cl'], - #precision=prec_gpu, + method={Support: 'gpu'}, + src=['./levelSet2D.cl'], + precision=prec_gpu, + topo=advec.advecDir[0].discreteFields[velo].topology ) ##Problem @@ -97,12 +96,3 @@ p._printStep() pb.solve() pb.finalize() - -th, tt = pb.timer.toString() -for op in pb.operators: - oth, ott = op.timer.toString() - th += oth - tt += ott -print th -print tt - diff --git a/Examples/levelSet3D.cl b/Examples/levelSet3D.cl index aaa36a537..63133416f 100644 --- a/Examples/levelSet3D.cl +++ b/Examples/levelSet3D.cl @@ -9,7 +9,7 @@ __kernel void initScalar(__global float* scalar, float4 pos, center=(float4)(0.35, 0.35, 0.35, 0.0); for(i=gidX; i<NB_X; i+=WI_NB) { - pos = (float4)(i*size.x, gidY*size.y, gidZ*size.z, 0.0); + pos = (float4)(i*size.x + minPos.x, gidY*size.y + minPos.y, gidZ*size.z + minPos.z, 0.0); scalar[i+gidY*NB_X+gidZ*NB_X*NB_Y] = ((distance(pos, center)<0.15) ? 1.0 : 0.0); } } diff --git a/Examples/levelSet3D.py b/Examples/levelSet3D.py index fd1a09bfc..2e6fe720a 100755 --- a/Examples/levelSet3D.py +++ b/Examples/levelSet3D.py @@ -14,32 +14,32 @@ from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK #from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK from parmepy.numerics.interpolation import Linear from parmepy.numerics.remeshing import L2_1 as rmsh -# from math import sin, cos, pi, sqrt +from math import sin, cos, pi, sqrt -# def scalaire(x,y,z): -# r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2) -# if r < 0.15: -# return 1. -# else: -# return 0. +def scalaire(x, y, z): + r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2) + if r < 0.15: + return 1. + else: + return 0. -# def vitesse(x, y, z, t=0): -# vx = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.) -# vy = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.) -# vz = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.) -# return vx, vy, vz +def vitesse(x, y, z, t=0): + vx = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.) + vy = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.) + vz = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.) + return vx, vy, vz dim = 3 boxLength = [1., 1., 1.] boxMin = [0., 0., 0.] -nbElem = 3 * [65] +nbElem = 3 * [129] timeStep = 0.025 finalTime = 3. outputFilePrefix = './res3D/levelSet_3D' -outputModulo = 1 +outputModulo = 5 simu = Simulation(tinit=0.0, tend=finalTime, timeStep=timeStep, iterMax=10000) ## Domain @@ -61,11 +61,13 @@ advec = Advection(velo, scal, src=['./levelSet3D.cl'], precision=gpu_prec ) +advec.discretize() velocity = Analytic(velo, resolutions={velo: nbElem}, method={Support: 'gpu'}, src=['./levelSet3D.cl'], - precision=gpu_prec + precision=gpu_prec, + topo=advec.advecDir[0].discreteFields[velo].topology ) ##Problem @@ -85,14 +87,3 @@ p._printStep() pb.solve() pb.finalize() - -th, tt = pb.timer.toString() -for op in pb.operators: - try: - oth, ott = op.timer.toString() - except AttributeError: - pass - th += oth - tt += ott -print th -print tt diff --git a/HySoP/hysop/fields/continuous.py b/HySoP/hysop/fields/continuous.py index 33c8d4211..34f4390e1 100644 --- a/HySoP/hysop/fields/continuous.py +++ b/HySoP/hysop/fields/continuous.py @@ -7,6 +7,7 @@ from parmepy.constants import debug from parmepy.fields.discrete import DiscreteField #from parmepy.fields.vector import VectorField from parmepy.mpi import main_rank +from parmepy.tools.timers import Timer class Field(object): @@ -80,6 +81,8 @@ class Field(object): ## If set, this topology will be used to initialize the field. self.topoInit = None + ## Object to store computational times + self.timer = Timer(self, ' (' + self.name + ')') @debug def discretize(self, topo): @@ -247,7 +250,6 @@ class Field(object): """ self.topoInit = topo -if __name__ == "__main__": - print __doc__ - print "- Provided class : ContinuousField" - print Field.__doc__ + def finalize(self): + for df in self.discreteFields.values(): + self.timer = self.timer + df.timer diff --git a/HySoP/hysop/fields/discrete.py b/HySoP/hysop/fields/discrete.py index 735bc561e..f49f141b7 100644 --- a/HySoP/hysop/fields/discrete.py +++ b/HySoP/hysop/fields/discrete.py @@ -94,7 +94,7 @@ class DiscreteField(object): ## Scalar or vector self.isVector = isVector ## Object to store computational times - self.timer = Timer(self) + self.timer = Timer(self, ' (' + self.name + ')') ## Number of components of the field if nbComponents is None: self.nbComponents = self.dimension if self.isVector else 1 diff --git a/HySoP/hysop/gpu/gpu_analytic.py b/HySoP/hysop/gpu/gpu_analytic.py index f1bba8b57..5832a863e 100644 --- a/HySoP/hysop/gpu/gpu_analytic.py +++ b/HySoP/hysop/gpu/gpu_analytic.py @@ -3,13 +3,13 @@ Analytic operator on GPU. """ from parmepy.operator.discrete.discrete import DiscreteOperator -from parmepy.constants import debug, np +from parmepy.constants import debug, np, PARMES_INDEX, S_DIR from parmepy import __VERBOSE__ from parmepy.gpu.tools import get_opencl_environment from parmepy.gpu.gpu_kernel import KernelLauncher -#from parmepy.gpu import CL_PROFILE from parmepy.tools.timers import timed_function from parmepy.methods_keys import Support +from parmepy.tools.timers import Timer class GPUAnalytic(DiscreteOperator): @@ -49,9 +49,10 @@ class GPUAnalytic(DiscreteOperator): self.resolution = self.field.topology.mesh.resolution ## OpenCL environment self.cl_env = get_opencl_environment(platform_id, device_id, - device_type, precision, - self.resolution) + device_type, precision) self.numMethod = None + ## Object to store computational times of OpenCL kernels + self.kernels_timer = Timer(self) @debug def setUp(self): @@ -84,6 +85,10 @@ class GPUAnalytic(DiscreteOperator): print "=== Kernel sources ===" ## OpenCL compiling options self.build_options = "" + resol = np.ones((3,), dtype=PARMES_INDEX) + resol[:dim] = self.resolution[...] + for i in xrange(3): + self.build_options += " -D NB" + S_DIR[i] + "=" + str(resol[i]) self.build_options += " -D WI_NB=" + str(self.workItemNumber) ## OpenCL Binaries if self.usr_src is not None: @@ -91,8 +96,7 @@ class GPUAnalytic(DiscreteOperator): kernel_name = 'analytic' + self.field.name.rsplit('_', 1)[0] self.numMethod = KernelLauncher(eval('self.prg.' + kernel_name), self.cl_env.queue, - self.gwi, - self.lwi) + self.gwi, self.lwi) @debug @timed_function @@ -105,5 +109,6 @@ class GPUAnalytic(DiscreteOperator): def finalize(self): if self.numMethod is not None and self.numMethod.f_timer is not None: for f_timer in self.numMethod.f_timer: - self.timer.addFunctionTimer(f_timer) + self.kernels_timer.addFunctionTimer(f_timer) + self.timer.addSubTimer(self.kernels_timer, 'Details:') DiscreteOperator.finalize(self) diff --git a/HySoP/hysop/gpu/gpu_discrete.py b/HySoP/hysop/gpu/gpu_discrete.py index 90691c243..86b4dde55 100644 --- a/HySoP/hysop/gpu/gpu_discrete.py +++ b/HySoP/hysop/gpu/gpu_discrete.py @@ -9,6 +9,8 @@ from parmepy.fields.discrete import DiscreteField from parmepy.gpu import cl, clArray, PARMES_REAL_GPU, CL_PROFILE from parmepy.tools.timers import timed_function from parmepy.gpu.gpu_kernel import KernelLauncher, KernelListLauncher +from parmepy.tools.numpywrappers import zeros +from parmepy.mpi.main_var import main_rank, main_size class GPUDiscreteField(DiscreteField): @@ -41,6 +43,7 @@ class GPUDiscreteField(DiscreteField): self._isReleased = False ## OpenCL Buffer pointer self.gpu_data = [None] * self.nbComponents + self._tmp = zeros(np.prod(self.data[0].shape), dtype=precision) ## Data layout is direction dependant self.layout = layout for d in xrange(self.nbComponents): @@ -125,9 +128,8 @@ class GPUDiscreteField(DiscreteField): self.data[d] = np.asarray( self.data[d], dtype=self.precision, order=ORDER) - evts = self.toDevice() - for evt in evts: - evt.wait() + self.toDevice() + self.cl_env.queue.finish() else: if isGPUKernel: self.init_kernel = formula @@ -153,7 +155,7 @@ class GPUDiscreteField(DiscreteField): coord_min, mesh_size, *args) evt.wait() - def release(self): + def finalize(self): if not self._isReleased: if __VERBOSE__: print "deallocate :", self.name, @@ -163,7 +165,6 @@ class GPUDiscreteField(DiscreteField): if self.init_kernel is not None: for f_timer in self.init_kernel.f_timer: self.timer.addFunctionTimer(f_timer) - print self.timer self._isReleased = True @timed_function @@ -190,23 +191,32 @@ class GPUDiscreteField(DiscreteField): evt = [None] * self.nbComponents if self.layout: - evt[0] = cl.enqueue_copy( - self.cl_env.queue, self.gpu_data[0].data, - self.data[0].ravel(order=ORDER)) + self._tmp[...] = self.data[0].ravel(order=ORDER)[...] + evt[0] = cl.enqueue_copy(self.cl_env.queue, + self.gpu_data[0].data, self._tmp) if self.nbComponents > 1: - evt[1] = cl.enqueue_copy( - self.cl_env.queue, self.gpu_data[1].data, - self.data[1].swapaxes(0, 1).ravel(order=ORDER)) + self._tmp[...] = \ + self.data[1].swapaxes(0, 1).ravel(order=ORDER)[...] + evt[1] = cl.enqueue_copy(self.cl_env.queue, + self.gpu_data[1].data, self._tmp) if self.nbComponents > 2: - evt[2] = cl.enqueue_copy( - self.cl_env.queue, self.gpu_data[2].data, - self.data[2].swapaxes(0, 1).swapaxes(0, 2).ravel( - order=ORDER)) + #layout on device is ZXY in sequential + # and ZYX in parallel + if main_size == 1: + self._tmp[...] = \ + self.data[2].swapaxes(0, 1).swapaxes(0, 2).ravel( + order=ORDER)[...] + else: + self._tmp[...] = \ + self.data[2].swapaxes(0, 2).ravel(order=ORDER)[...] + evt[2] = cl.enqueue_copy(self.cl_env.queue, + self.gpu_data[2].data, self._tmp) else: for d in xrange(self.nbComponents): + self._tmp[...] = self.data[d].ravel(order=ORDER)[...] evt[d] = cl.enqueue_copy( self.cl_env.queue, self.gpu_data[d].data, - self.data[d].ravel(order=ORDER)) + self._tmp) if CL_PROFILE: self.cl_env.queue.finish() for e in evt: @@ -217,6 +227,7 @@ class GPUDiscreteField(DiscreteField): print self.mem_size, "Bytes transfered at ", print "{0:.3f} GBytes/sec".format( self.mem_size / (time * 1024 ** 3)) + self.cl_env.queue.finish() return evt @timed_function @@ -238,30 +249,46 @@ class GPUDiscreteField(DiscreteField): evt = [None] * self.nbComponents if self.layout: # Get XDIR component - evt[0] = cl.enqueue_copy(self.cl_env.queue, self.data[0], + evt[0] = cl.enqueue_copy(self.cl_env.queue, self._tmp, self.gpu_data[0].data) + self.cl_env.queue.finish() + self.data[0][...] = self._tmp.reshape( + self.data[0].shape, order=ORDER)[...] if self.nbComponents > 1: - # Get YDIR component - temp = np.empty_like(self.data[1]) - if self.dimension == 3: - temp.resize((shape[1], shape[0], shape[2])) - else: - temp.resize((shape[1], shape[0])) - evt[1] = cl.enqueue_copy(self.cl_env.queue, temp, + evt[1] = cl.enqueue_copy(self.cl_env.queue, self._tmp, self.gpu_data[1].data) - self.data[1] = temp.swapaxes(0, 1) + self.cl_env.queue.finish() + shape = list(self.data[1].shape) + shape[0] = self.data[1].shape[1] + shape[1] = self.data[1].shape[0] + self.data[1][...] = self._tmp.reshape( + tuple(shape), order=ORDER).swapaxes(0, 1)[...] if self.nbComponents > 2: # Get ZDIR component - temp = np.empty_like(self.data[2]) - temp.resize((shape[2], shape[0], shape[1])) - evt[2] = cl.enqueue_copy(self.cl_env.queue, temp, + evt[2] = cl.enqueue_copy(self.cl_env.queue, self._tmp, self.gpu_data[2].data) - self.data[2] = temp.swapaxes(0, 2).swapaxes(0, 1) + self.cl_env.queue.finish() + shape = list(self.data[2].shape) + if main_size == 1: + shape[0] = self.data[2].shape[2] + shape[1] = self.data[2].shape[0] + shape[2] = self.data[2].shape[1] + self.data[2][...] = self._tmp.reshape( + tuple(shape), + order=ORDER).swapaxes(0, 2).swapaxes(0, 1)[...] + else: + shape[0] = self.data[2].shape[2] + shape[2] = self.data[2].shape[0] + self.data[2][...] = self._tmp.reshape( + tuple(shape), order=ORDER).swapaxes(0, 2)[...] else: for d in xrange(self.nbComponents): # Get d component - evt[d] = cl.enqueue_copy(self.cl_env.queue, self.data[d], + evt[d] = cl.enqueue_copy(self.cl_env.queue, self._tmp, self.gpu_data[d].data) + self.cl_env.queue.finish() + self.data[d][...] = self._tmp.reshape( + self.data[d].shape, order=ORDER)[...] if CL_PROFILE: self.cl_env.queue.finish() for e in evt: diff --git a/HySoP/hysop/gpu/gpu_particle_advection.py b/HySoP/hysop/gpu/gpu_particle_advection.py index 549ee39d9..322e0e978 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection.py +++ b/HySoP/hysop/gpu/gpu_particle_advection.py @@ -5,7 +5,7 @@ Discrete advection representation """ from abc import ABCMeta, abstractmethod from parmepy import __VERBOSE__ -from parmepy.constants import np, debug +from parmepy.constants import np, debug, PARMES_INDEX, S_DIR from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh, \ Support, Splitting from parmepy.numerics.integrators.runge_kutta2 import RK2 @@ -16,6 +16,8 @@ from parmepy.gpu import PARMES_REAL_GPU, cl from parmepy.gpu.tools import get_opencl_environment from parmepy.gpu.gpu_kernel import KernelLauncher from parmepy.tools.timers import Timer +from parmepy.mpi.main_var import main_size +from parmepy.tools.timers import timed_function class GPUParticleAdvection(ParticleAdvection): @@ -60,39 +62,60 @@ class GPUParticleAdvection(ParticleAdvection): """ ParticleAdvection.__init__(self, velocity, advectedFields, d, part_position, part_advectedFields, method) - self.method = [method[TimeIntegrator].__name__, - method[Remesh].__name__] - print self.method + self.method = method self.user_gpu_src = src self.num_method = None self.dim = self.advectedFields[0].dimension + self.cl_env = get_opencl_environment(platform_id, device_id, + device_type, precision) # Compute resolutions for kernels for each direction. resol = self.advectedFields[0].topology.mesh.resolution - self.resolution = resol.copy() - if self.dim == 2: - if self.dir == 1: - self.resolution[0] = resol[1] - self.resolution[1] = resol[0] - else: - if self.dir == 1: - self.resolution[0] = resol[1] - self.resolution[1] = resol[0] - self.resolution[2] = resol[2] - if self.dir == 2: - self.resolution[0] = resol[2] - self.resolution[1] = resol[0] - self.resolution[2] = resol[1] + # Resolution of the local mesh + self.resol = np.ones((3,), dtype=PARMES_INDEX) + # Resolution of the local mesh but reoganized redarding + # splitting direction: + # direction X : XYZ + # direction Y : YXZ + # direction Z : ZYX in parallel, ZXY in sequentiel + self.resol_dir = np.ones((3,), dtype=PARMES_INDEX) + self.resol[:self.dim] = resol[...] + self.resol_dir[:self.dim] = resol[...] + if self.dir == 1: # YXZ + self.resol_dir[0] = self.resol[1] + self.resol_dir[1] = self.resol[0] + elif self.dir == 2: + if main_size == 1 and self.method[Splitting].find('o2') >= 0: + # ZXY (XY and XZ transpositions are consecutive) + self.resol_dir[0] = self.resol[2] + self.resol_dir[1] = self.resol[0] + self.resol_dir[2] = self.resol[1] + else: # ZYX + self.resol_dir[0] = self.resol[2] + self.resol_dir[2] = self.resol[0] + + # Size constants for local mesh size + self._size_constants = "" + for i in xrange(3): + self._size_constants += " -D NB" + S_DIR[i] + self._size_constants += "=" + str(self.resol[i]) + # Direction dependant constants + self._constants = [" -D NB_I=NB_X -D NB_II=NB_Y -D NB_III=NB_Z", + " -D NB_I=NB_Y -D NB_II=NB_X -D NB_III=NB_Z", + " -D NB_I=NB_Z -D NB_II=NB_Y -D NB_III=NB_X"] + if main_size == 1 and self.method[Splitting].find('o2') >= 0: + self._constants[2] = \ + " -D NB_I=NB_Z -D NB_II=NB_X -D NB_III=NB_Y" - self.cl_env = get_opencl_environment(platform_id, device_id, - device_type, precision, - resol) - self.warning = True self.prg = None ## Object to store computational times of OpenCL kernels self.kernels_timer = Timer(self) self._num_locMem = None + self._kernel_cfg = self.cl_env.kernels_config[self.dim][precision] + self.copy = None + self.transpose_xy, self.transpose_xy_r = None, None + self.transpose_xz, self.transpose_xz_r = None, None @debug def setUp(self): @@ -107,7 +130,7 @@ class GPUParticleAdvection(ParticleAdvection): """ self.gpu_precision = self.cl_env.precision self.workItemNumber, self.gwi, self.lwi = self.cl_env.get_WorkItems( - self.resolution) + self.resol_dir) if __VERBOSE__: print "Work-items basic config : ", print self.workItemNumber, self.gwi, self.lwi @@ -122,7 +145,9 @@ class GPUParticleAdvection(ParticleAdvection): self.build_options = "" self._collect_usr_cl_src(self.user_gpu_src) self._collect_kernels_cl_src_copy() - self._collect_kernels_cl_src_transpositions() + self._collect_kernels_cl_src_transpositions_xy() + if self.dim == 3: + self._collect_kernels_cl_src_transpositions_xz() self._collect_kernels_cl_src() if __VERBOSE__: print "=== OpenCL Buffer allocations ===" @@ -132,9 +157,121 @@ class GPUParticleAdvection(ParticleAdvection): print "=== OpenCL Buffer initialisation ===" if self.dir == 0: self._buffer_initialisations() + + # Beanching the proper _compute function + if self.part_advectedFields[0].nbComponents == 1: + self._compute = self._compute_1c + elif self.part_advectedFields[0].nbComponents == 2: + self._compute = self._compute_2c + elif self.part_advectedFields[0].nbComponents == 3: + self._compute = self._compute_3c + + # Build execution list regarding splitting: + # In sequential configuration, data can stay in their direction + # dependant layout between directions. In parallel, data must be + # redistributed (between directions) in XYZ layout. + # Splitting Strang 2nd order: + # 3D: X(dt/2), Y(dt/2), Z(dt), Y(dt/2), X(dt/2) + # 2D: X(dt/2), Y(dt), X(dt/2) + if self.method[Splitting] == 'o2': + if main_size > 1: + if self.dim == 2: + self.exec_list = [ + [self._init_copy, self._compute], # X(dt/2) + [self._init_transpose_xy, self._compute, # Y(dt) + self._init_transpose_xy_r, self._init_copy_r], + [self._init_copy, self._compute] # X(dt/2) + ] + elif self.dim == 3: + self.exec_list = [ + [self._init_copy, self._compute], # X(dt/2) + [self._init_transpose_xy, self._compute, # Y(dt/2) + self._init_transpose_xy_r, self._init_copy_r], + [self._init_transpose_xz, self._compute, # Z(dt) + self._init_transpose_xz_r, self._init_copy_r], + [self._init_transpose_xy, self._compute, # Y(dt/2) + self._init_transpose_xy_r, self._init_copy_r], + [self._init_copy, self._compute] # X(dt/2) + ] + else: + if self.dim == 2: + self.exec_list = [ + [self._init_copy, self._compute], # X(dt/2) + [self._init_transpose_xy, self._compute], # Y(dt) + [self._init_transpose_xy, self._compute] # X(dt/2) + ] + elif self.dim == 3: + self.exec_list = [ + [self._init_copy, self._compute], # X(dt/2) + [self._init_transpose_xy, self._compute], # Y(dt/2) + [self._init_transpose_xz, self._compute], # Z(dt) + [self._init_transpose_xz, self._compute], # Y(dt/2) + [self._init_transpose_xy, self._compute] # X(dt/2) + ] + + # Splitting Strang 2nd order (fullHalf): + # X(dt/2), Y(dt/2), Z(dt/2), Z(dt/2), Y(dt/2), X(dt/2) + elif self.method[Splitting] == 'o2_FullHalf': + if main_size > 1: + if self.dim == 2: + self.exec_list = [ + [self._init_copy, self._compute], # X(dt/2) + [self._init_transpose_xy, self._compute], # Y(dt/2) + [self._init_copy, self._compute, # Y(dt/2) + self._init_transpose_xy_r, self._init_copy_r], + [self._init_copy, self._compute] # X(dt/2) + ] + elif self.dim == 3: + self.exec_list = [ + [self._init_copy, self._compute], # X(dt/2) + [self._init_transpose_xy, self._compute, # Y(dt/2) + self._init_transpose_xy_r, self._init_copy_r], + [self._init_transpose_xz, self._compute], # Z(dt/2) + [self._init_copy, self._compute, # Z(dt/2) + self._init_transpose_xz_r, self._init_copy_r], + [self._init_transpose_xy, self._compute, # Y(dt/2) + self._init_transpose_xy_r, self._init_copy_r], + [self._init_copy, self._compute] # X(dt/2) + ] + else: + if self.dim == 2: + self.exec_list = [ + [self._init_copy, self._compute], # X(dt/2) + [self._init_transpose_xy, self._compute], # Y(dt) + [self._init_copy, self._compute], # Y(dt) + [self._init_transpose_xy, self._compute] # X(dt/2) + ] + elif self.dim == 3: + self.exec_list = [ + [self._init_copy, self._compute], # X(dt/2) + [self._init_transpose_xy, self._compute], # Y(dt/2) + [self._init_transpose_xz, self._compute], # Z(dt/2) + [self._init_copy, self._compute], # Z(dt/2) + [self._init_transpose_xz, self._compute], # Y(dt/2) + [self._init_transpose_xy, self._compute] # X(dt/2) + ] + else: + raise ValueError('Not yet implemeted Splitting on GPU : ' + + self.method[Splitting]) + if __VERBOSE__: print "===\n" + def _collect_kernels_cl_src(self): + pass + + def _buffer_allocations(self): + raise ValueError('This method must be implemented in subclasses.') + + def _compute_1c(self): + raise ValueError('This method must be implemented in subclasses.') + + def _compute_2c(self): + raise ValueError('This method must be implemented in subclasses.') + + def _compute_3c(self): + raise ValueError('This method must be implemented in subclasses.') + def _buffer_initialisations(self): """ OpenCL buffer initializations from user OpenCL kernels. @@ -159,59 +296,121 @@ class GPUParticleAdvection(ParticleAdvection): """ build_options = self.build_options # copy settings - src, t_dim, b_rows, vec, f_space = \ - self.cl_env.kernels_config[self.dim][self.gpu_precision]['copy'] - while t_dim > self.resolution[0]: + src, t_dim, b_rows, vec, f_space = self._kernel_cfg['copy'] + while t_dim > self.resol_dir[0]: t_dim /= 2 - gwi, lwi = f_space(self.resolution, t_dim, b_rows, vec) - + gwi, lwi = f_space(self.resol_dir, t_dim, b_rows, vec) # Build code build_options += " -D TILE_DIM_COPY={0}".format(t_dim) build_options += " -D BLOCK_ROWS_COPY={0}".format(b_rows) + build_options += self._size_constants + self._constants[self.dir] prg = self.cl_env.build_src( src, - build_options + self.cl_env.default_sizes_constants[self.dir], + build_options, vec) - self.init_copy = KernelLauncher(prg.copy, - self.cl_env.queue, gwi, lwi) + self.copy = KernelLauncher(prg.copy, + self.cl_env.queue, gwi, lwi) - def _collect_kernels_cl_src_transpositions(self): + def _collect_kernels_cl_src_transpositions_xy(self): """Compile OpenCL sources for transpositions kernel. @remark : Transpositions kernels are launched as initialization. Arrays are taken in their destination layout (for initialize in Y directions, either we came from X or Z but shapes are the Y ones). """ + resol = self.advectedFields[0].topology.mesh.resolution + resol_tmp = np.empty_like(resol) + # XY transposition settings - build_options = self.build_options - src, t_dim, b_rows, is_padding, vec, f_space = \ - self.cl_env.kernels_config[self.dim][self.gpu_precision]['transpose_xy'] - while t_dim > self.resolution[0] or t_dim > self.resolution[1]: - t_dim /= 2 - gwi, lwi = f_space(self.resolution, t_dim, b_rows, vec) + is_XY_needed = self.dir == 1 or (self.dir == 0 and main_size == 1) + if is_XY_needed: + resol_tmp[...] = resol[...] + if self.dir == 1: # (XY -> YX) + resol_tmp[0] = resol[1] + resol_tmp[1] = resol[0] + ocl_cte = " -D NB_I=NB_Y -D NB_II=NB_X -D NB_III=NB_Z" + elif self.dir == 0: # (YX -> XY) only for sequential + ocl_cte = " -D NB_I=NB_X -D NB_II=NB_Y -D NB_III=NB_Z" + build_options = self.build_options + self._size_constants + src, t_dim, b_rows, is_padding, vec, f_space = \ + self._kernel_cfg['transpose_xy'] + while t_dim > resol_tmp[0] or t_dim > resol_tmp[1]: + t_dim /= 2 + gwi, lwi = f_space(resol_tmp, t_dim, b_rows, vec) - if is_padding: - build_options += " -D PADDING_XY=1" - else: - build_options += " -D PADDING_XY=0" - build_options += " -D TILE_DIM_XY={0}".format(t_dim) - build_options += " -D BLOCK_ROWS_XY={0}".format(b_rows) - prg = self.cl_env.build_src( - src, - build_options + self.cl_env.default_sizes_constants[self.dir], vec) + if is_padding: + build_options += " -D PADDING_XY=1" + else: + build_options += " -D PADDING_XY=0" + build_options += " -D TILE_DIM_XY={0}".format(t_dim) + build_options += " -D BLOCK_ROWS_XY={0}".format(b_rows) + build_options += ocl_cte + prg = self.cl_env.build_src( + src, + build_options, vec) - self.init_transpose_xy = KernelLauncher( - prg.transpose_xy, self.cl_env.queue, gwi, lwi) + self.transpose_xy = KernelLauncher( + prg.transpose_xy, self.cl_env.queue, gwi, lwi) - if self.dim == 3: - # XY transposition settings - build_options = self.build_options + is_XY_r_needed = self.dir == 1 and main_size > 1 + if is_XY_r_needed: + # Reversed XY transposition settings (YX -> XY), only in parallel + resol_tmp[...] = resol[...] + ocl_cte = " -D NB_I=NB_X -D NB_II=NB_Y -D NB_III=NB_Z" + + build_options = self.build_options + self._size_constants + src, t_dim, b_rows, is_padding, vec, f_space = \ + self._kernel_cfg['transpose_xy'] + while t_dim > resol_tmp[0] or t_dim > resol_tmp[1]: + t_dim /= 2 + gwi, lwi = f_space(resol_tmp, t_dim, b_rows, vec) + + if is_padding: + build_options += " -D PADDING_XY=1" + else: + build_options += " -D PADDING_XY=0" + build_options += " -D TILE_DIM_XY={0}".format(t_dim) + build_options += " -D BLOCK_ROWS_XY={0}".format(b_rows) + build_options += ocl_cte + prg = self.cl_env.build_src( + src, + build_options, vec) + + self.transpose_xy_r = KernelLauncher( + prg.transpose_xy, self.cl_env.queue, gwi, lwi) + + def _collect_kernels_cl_src_transpositions_xz(self): + resol = self.advectedFields[0].topology.mesh.resolution + resol_tmp = np.empty_like(resol) + + is_XZ_needed = self.dir == 2 or (self.dir == 1 and main_size == 1) + # XZ transposition settings + if is_XZ_needed: + resol_tmp[...] = resol[...] + if self.dir == 1: # ZXY -> YXZ (only for seqential) + resol_tmp[0] = resol[1] + resol_tmp[1] = resol[0] + resol_tmp[2] = resol[2] + ocl_cte = " -D NB_I=NB_Y -D NB_II=NB_X -D NB_III=NB_Z" + elif self.dir == 2: + if main_size == 1: # YXZ -> ZXY + resol_tmp[0] = resol[2] + resol_tmp[1] = resol[0] + resol_tmp[2] = resol[1] + ocl_cte = " -D NB_I=NB_Z -D NB_II=NB_X -D NB_III=NB_Y" + else: # XYZ -> ZYX + resol_tmp[0] = resol[2] + resol_tmp[1] = resol[1] + resol_tmp[2] = resol[0] + ocl_cte = " -D NB_I=NB_Z -D NB_II=NB_Y -D NB_III=NB_X" + + build_options = self.build_options + self._size_constants src, t_dim, b_rows, b_deph, is_padding, vec, f_space = \ - self.cl_env.kernels_config[self.dim][self.gpu_precision]['transpose_xz'] + self._kernel_cfg['transpose_xz'] - while t_dim > self.resolution[0] or t_dim > self.resolution[2]: + while t_dim > resol_tmp[0] or t_dim > resol_tmp[2]: t_dim /= 2 - gwi, lwi = f_space(self.resolution, t_dim, b_rows, b_deph, vec) + gwi, lwi = f_space(resol_tmp, t_dim, b_rows, b_deph, vec) if is_padding: build_options += " -D PADDING_XZ=1" else: @@ -219,11 +418,39 @@ class GPUParticleAdvection(ParticleAdvection): build_options += " -D TILE_DIM_XZ={0}".format(t_dim) build_options += " -D BLOCK_ROWS_XZ={0}".format(b_rows) build_options += " -D BLOCK_DEPH_XZ={0}".format(b_deph) + build_options += ocl_cte prg = self.cl_env.build_src( src, - build_options + self.cl_env.default_sizes_constants[self.dir], + build_options, vec) - self.init_transpose_xz = KernelLauncher( + self.transpose_xz = KernelLauncher( + prg.transpose_xz, self.cl_env.queue, gwi, lwi) + + is_XZ_r_needed = self.dir == 2 and main_size > 1 + if is_XZ_r_needed: + # Reversed XZ transposition settings (ZYX -> XYZ) + resol_tmp[...] = resol[...] + ocl_cte = " -D NB_I=NB_X -D NB_II=NB_Y -D NB_III=NB_Z" + build_options = self.build_options + self._size_constants + src, t_dim, b_rows, b_deph, is_padding, vec, f_space = \ + self._kernel_cfg['transpose_xz'] + + while t_dim > resol_tmp[0] or t_dim > resol_tmp[2]: + t_dim /= 2 + gwi, lwi = f_space(resol_tmp, t_dim, b_rows, b_deph, vec) + if is_padding: + build_options += " -D PADDING_XZ=1" + else: + build_options += " -D PADDING_XZ=0" + build_options += " -D TILE_DIM_XZ={0}".format(t_dim) + build_options += " -D BLOCK_ROWS_XZ={0}".format(b_rows) + build_options += " -D BLOCK_DEPH_XZ={0}".format(b_deph) + build_options += ocl_cte + prg = self.cl_env.build_src( + src, + build_options, + vec) + self.transpose_xz_r = KernelLauncher( prg.transpose_xz, self.cl_env.queue, gwi, lwi) def _collect_usr_cl_src(self, usr_src): @@ -231,12 +458,13 @@ class GPUParticleAdvection(ParticleAdvection): Build user sources. """ - build_options = self.build_options + build_options = self.build_options + self._size_constants build_options += " -D WI_NB=" + str(self.workItemNumber) if usr_src is not None: self.prg = self.cl_env.build_src(usr_src, build_options) @debug + @timed_function def apply(self, simulation, dtCoeff, split_id, old_dir): """ Apply operator along specified splitting direction. @@ -245,52 +473,49 @@ class GPUParticleAdvection(ParticleAdvection): @param d : Splitting direction @param split_id : Splitting step id """ - dim = self.advectedFields[0].dimension - self.cl_env.queue.finish() - # Particle init - if old_dir == self.dir: - for p, g in zip(self.advectedFields[0].gpu_data, - self.part_advectedFields[0].gpu_data): - self.init_copy(p.data, g.data) - else: - if dim == 2: - for p, g in zip(self.advectedFields[0].gpu_data, - self.part_advectedFields[0].gpu_data): - self.init_transpose_xy(p.data, g.data) - else: - if min(old_dir, self.dir) == 0: - for p, g in zip(self.advectedFields[0].gpu_data, - self.part_advectedFields[0].gpu_data): - self.init_transpose_xy(p.data, g.data) - else: - for p, g in zip(self.advectedFields[0].gpu_data, - self.part_advectedFields[0].gpu_data): - self.init_transpose_xz(p.data, g.data) + for exe in self.exec_list[split_id]: + exe(simulation, dtCoeff, split_id, old_dir) + self.cl_env.queue.finish() + + def _init_copy(self, simulation, dtCoeff, split_id, old_dir): + for p, g in zip(self.advectedFields[0].gpu_data, + self.part_advectedFields[0].gpu_data): + self.copy(p.data, g.data) + + def _init_copy_r(self, simulation, dtCoeff, split_id, old_dir): + for p, g in zip(self.advectedFields[0].gpu_data, + self.part_advectedFields[0].gpu_data): + self.copy(g.data, p.data) + + def _init_transpose_xy(self, simulation, dtCoeff, split_id, old_dir): + for p, g in zip(self.advectedFields[0].gpu_data, + self.part_advectedFields[0].gpu_data): + self.transpose_xy(p.data, g.data) + + def _init_transpose_xy_r(self, simulation, dtCoeff, split_id, old_dir): + for p, g in zip(self.advectedFields[0].gpu_data, + self.part_advectedFields[0].gpu_data): + self.transpose_xy_r(p.data, g.data) + + def _init_transpose_xz(self, simulation, dtCoeff, split_id, old_dir): + for p, g in zip(self.advectedFields[0].gpu_data, + self.part_advectedFields[0].gpu_data): + self.transpose_xz(p.data, g.data) + + def _init_transpose_xz_r(self, simulation, dtCoeff, split_id, old_dir): + for p, g in zip(self.advectedFields[0].gpu_data, + self.part_advectedFields[0].gpu_data): + self.transpose_xz_r(p.data, g.data) @debug def finalize(self): """ Free OpenCL memory. """ - for gpudf in self.variables: - gpudf.release() - if self.init_copy.f_timer is not None: - for f_timer in self.init_copy.f_timer: - self.kernels_timer.addFunctionTimer(f_timer) - for f_timer in self.init_transpose_xy.f_timer: - self.kernels_timer.addFunctionTimer(f_timer) - if len(self.resolution) == 3: - for f_timer in self.init_transpose_xz.f_timer: + for k in [self.copy, self.transpose_xy, self.transpose_xy_r, + self.transpose_xy, self.transpose_xy_r]: + if k is not None: + for f_timer in k.f_timer: self.kernels_timer.addFunctionTimer(f_timer) + self.timer.addSubTimer(self.kernels_timer, 'Details:') ParticleAdvection.finalize(self) - self.timer.addSubTimer(self.kernels_timer, ' Details:') - - def __str__(self): - s = "GPUParticleAdvection (DiscreteOperator). " - s += GPUParticleAdvection.__str__(self) - return s - -if __name__ == "__main__": - print __doc__ - print "- Provided class : GPUParticleAdvection" - print GPUParticleAdvection.__doc__ diff --git a/HySoP/hysop/gpu/gpu_particle_advection_1k.py b/HySoP/hysop/gpu/gpu_particle_advection_1k.py index b98aeb992..e3f6b47ea 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection_1k.py +++ b/HySoP/hysop/gpu/gpu_particle_advection_1k.py @@ -14,7 +14,6 @@ from parmepy.fields.continuous import Field from parmepy.gpu.gpu_discrete import GPUDiscreteField from parmepy.gpu import PARMES_REAL_GPU from parmepy.gpu.gpu_kernel import KernelLauncher -from parmepy.tools.timers import timed_function class GPUParticleAdvection1k(GPUParticleAdvection): @@ -59,6 +58,7 @@ class GPUParticleAdvection1k(GPUParticleAdvection): device_type, method, src, precision) + self.num_advec_and_remesh = None @debug def setUp(self): @@ -105,109 +105,79 @@ class GPUParticleAdvection1k(GPUParticleAdvection): """ Compile OpenCL sources for advection and remeshing kernel. """ - build_options = self.build_options - src, is_noBC, vec, f_space = \ - self.cl_env.kernels_config[self.dim][self.gpu_precision]['advec_and_remesh'] - gwi, lwi = f_space(self.resolution, vec) + build_options = self.build_options + self._size_constants + src, is_noBC, vec, f_space = self._kernel_cfg['advec_and_remesh'] + gwi, lwi = f_space(self.resol_dir, vec) WINb = lwi[0] - build_options += " -D FORMULA=" + self.method[1].upper() + build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper() if is_noBC: build_options += " -D WITH_NOBC=1" - build_options_d = " -D WI_NB=" + str(WINb) - build_options_d += " -D PART_NB_PER_WI=" - build_options_d += str(self.resolution[0] / WINb) - build_options_d += self.cl_env.default_sizes_constants[self.dir] + build_options += " -D WI_NB=" + str(WINb) + build_options += " -D PART_NB_PER_WI=" + build_options += str(self.resol_dir[0] / WINb) + build_options += self._constants[self.dir] ## Build code - src = [s.replace('RKN', self.method[0].lower()) for s in src] + src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower()) + for s in src] prg = self.cl_env.build_src( - src, build_options + build_options_d, vec, + src, build_options, vec, nb_remesh_components=self.part_advectedFields[0].nbComponents) self.num_advec_and_remesh = KernelLauncher( prg.advection_and_remeshing, self.cl_env.queue, gwi, lwi) ## Local memory array for kernels - self._num_locMem = [self.cl_env.LocalMemAllocator(self.resolution[0]), - self.cl_env.LocalMemAllocator( - self.resolution[0], reuse=False)] + self._num_locMem = [ + self.cl_env.LocalMemAllocator(self.resol_dir[0]), + self.cl_env.LocalMemAllocator(self.resol_dir[0], reuse=False)] if self.part_advectedFields[0].nbComponents >= 2: - self._num_locMem.append(self.cl_env.LocalMemAllocator( - self.resolution[0], reuse=False)) + self._num_locMem.append( + self.cl_env.LocalMemAllocator(self.resol_dir[0], reuse=False)) if self.part_advectedFields[0].nbComponents == 3: - self._num_locMem.append(self.cl_env.LocalMemAllocator( - self.resolution[0], reuse=False)) + self._num_locMem.append( + self.cl_env.LocalMemAllocator(self.resol_dir[0], reuse=False)) - @debug - @timed_function - def apply(self, simulation, dtCoeff, split_id, old_dir): - """ - Apply advection operator. - - @param t : current time. - @param dt : time step. - @param d : Direction of splitting. - - Advection algorithm: - @li 1. Particle initialization : \n - - by copy scalar from grid to particles if previous splitting - direction equals current splitting direction.\n - - by transposition of scalar from grid to particle. - @li 2. Particle advection :\n - - compute particle position in splitting direction as a - scalar. Performs a RK2 resolution of dx_p/dt = a_p. - @li 3. Profile timings of OpenCL kernels. - """ - GPUParticleAdvection.apply(self, simulation, dtCoeff, - split_id, old_dir) + def _compute_1c(self, simulation, dtCoeff, split_id, old_dir): dt = simulation.timeStep * dtCoeff - # Advection - self.cl_env.queue.finish() - if self.part_advectedFields[0].nbComponents == 3: - self.num_advec_and_remesh( - self.velocity.gpu_data[self.dir].data, - self.part_advectedFields[0].gpu_data[0].data, - self.part_advectedFields[0].gpu_data[1].data, - self.part_advectedFields[0].gpu_data[2].data, - self.advectedFields[0].gpu_data[0].data, - self.advectedFields[0].gpu_data[1].data, - self.advectedFields[0].gpu_data[2].data, - self._num_locMem[0], self._num_locMem[1], - self._num_locMem[2], self._num_locMem[3], - self.gpu_precision(dt), - self.coord_min[self.dir], - self.mesh_size[self.dir]) - elif self.part_advectedFields[0].nbComponents == 2: - self.num_advec_and_remesh( - self.velocity.gpu_data[self.dir].data, - self.part_advectedFields[0].gpu_data[0].data, - self.part_advectedFields[0].gpu_data[1].data, - self.advectedFields[0].gpu_data[0].data, - self.advectedFields[0].gpu_data[1].data, - self._num_locMem[0], self._num_locMem[1], self._num_locMem[2], - self.gpu_precision(dt), - self.coord_min[self.dir], - self.mesh_size[self.dir]) - else: - self.num_advec_and_remesh( - self.velocity.gpu_data[self.dir].data, - self.part_advectedFields[0].gpu_data[0].data, - self.advectedFields[0].gpu_data[0].data, - self._num_locMem[0], self._num_locMem[1], - self.gpu_precision(dt), - self.coord_min[self.dir], - self.mesh_size[self.dir]) + self.num_advec_and_remesh( + self.velocity.gpu_data[self.dir].data, + self.part_advectedFields[0].gpu_data[0].data, + self.advectedFields[0].gpu_data[0].data, + self._num_locMem[0], self._num_locMem[1], + self.gpu_precision(dt), + self.coord_min[self.dir], + self.mesh_size[self.dir]) + + def _compute_2c(self, simulation, dtCoeff, split_id, old_dir): + dt = simulation.timeStep * dtCoeff + self.num_advec_and_remesh( + self.velocity.gpu_data[self.dir].data, + self.part_advectedFields[0].gpu_data[0].data, + self.part_advectedFields[0].gpu_data[1].data, + self.advectedFields[0].gpu_data[0].data, + self.advectedFields[0].gpu_data[1].data, + self._num_locMem[0], self._num_locMem[1], self._num_locMem[2], + self.gpu_precision(dt), + self.coord_min[self.dir], + self.mesh_size[self.dir]) + + def _compute_3c(self, simulation, dtCoeff, split_id, old_dir): + dt = simulation.timeStep * dtCoeff + self.num_advec_and_remesh( + self.velocity.gpu_data[self.dir].data, + self.part_advectedFields[0].gpu_data[0].data, + self.part_advectedFields[0].gpu_data[1].data, + self.part_advectedFields[0].gpu_data[2].data, + self.advectedFields[0].gpu_data[0].data, + self.advectedFields[0].gpu_data[1].data, + self.advectedFields[0].gpu_data[2].data, + self._num_locMem[0], self._num_locMem[1], + self._num_locMem[2], self._num_locMem[3], + self.gpu_precision(dt), + self.coord_min[self.dir], + self.mesh_size[self.dir]) def finalize(self): if self.num_advec_and_remesh.f_timer is not None: for f_timer in self.num_advec_and_remesh.f_timer: self.kernels_timer.addFunctionTimer(f_timer) GPUParticleAdvection.finalize(self) - - def __str__(self): - s = "GPUParticleAdvection1k (GPUParticleAdvection). " - s += GPUParticleAdvection.__str__(self) - return s - -if __name__ == "__main__": - print __doc__ - print "- Provided class : GPUParticleAdvection1k" - print GPUParticleAdvection1k.__doc__ diff --git a/HySoP/hysop/gpu/gpu_particle_advection_2k.py b/HySoP/hysop/gpu/gpu_particle_advection_2k.py index a54e98b42..95726f197 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection_2k.py +++ b/HySoP/hysop/gpu/gpu_particle_advection_2k.py @@ -14,7 +14,6 @@ from parmepy.fields.continuous import Field from parmepy.gpu.gpu_discrete import GPUDiscreteField from parmepy.gpu import PARMES_REAL_GPU from parmepy.gpu.gpu_kernel import KernelLauncher -from parmepy.tools.timers import timed_function class GPUParticleAdvection2k(GPUParticleAdvection): @@ -58,6 +57,7 @@ class GPUParticleAdvection2k(GPUParticleAdvection): device_type, method, src, precision) + self.num_advec, self.num_remesh = None, None @debug def setUp(self): @@ -111,79 +111,58 @@ class GPUParticleAdvection2k(GPUParticleAdvection): """ Compile OpenCL sources for advection and remeshing kernel. """ - build_options = self.build_options - src, is_noBC, vec, f_space = \ - self.cl_env.kernels_config[self.dim][self.gpu_precision]['advec'] - gwi, lwi = f_space(self.resolution, vec) + build_options = self.build_options + self._size_constants + src, is_noBC, vec, f_space = self._kernel_cfg['advec'] + gwi, lwi = f_space(self.resol_dir, vec) WINb = lwi[0] if is_noBC: build_options += " -D WITH_NOBC=1" - build_options_d = " -D WI_NB=" + str(WINb) + build_options += " -D WI_NB=" + str(WINb) + build_options += self._constants[self.dir] ## Build code - src = [s.replace('RKN', self.method[0].lower()) for s in src] + src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower()) + for s in src] prg = self.cl_env.build_src( src, - build_options_d + self.cl_env.default_sizes_constants[self.dir], + build_options, vec, nb_remesh_components=self.part_advectedFields[0].nbComponents) self.num_advec = KernelLauncher( prg.advection_kernel, self.cl_env.queue, gwi, lwi) ## remeshing - build_options = self.build_options - src, is_noBC, vec, f_space = \ - self.cl_env.kernels_config[self.dim][self.gpu_precision]['remesh'] - gwi, lwi = f_space(self.resolution, vec) + build_options = self.build_options + self._size_constants + src, is_noBC, vec, f_space = self._kernel_cfg['remesh'] + gwi, lwi = f_space(self.resol_dir, vec) WINb = lwi[0] - build_options += " -D FORMULA=" + self.method[1].upper() + build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper() if is_noBC: build_options += " -D WITH_NOBC=1" - build_options_d = " -D WI_NB=" + str(WINb) - build_options_d += " -D PART_NB_PER_WI=" - build_options_d += str(self.resolution[0] / WINb) - build_options_d += self.cl_env.default_sizes_constants[self.dir] + build_options += " -D WI_NB=" + str(WINb) + build_options += " -D PART_NB_PER_WI=" + build_options += str(self.resol_dir[0] / WINb) + build_options += self._constants[self.dir] ## Build code prg = self.cl_env.build_src( - src, build_options + build_options_d, vec, + src, build_options, vec, nb_remesh_components=self.part_advectedFields[0].nbComponents) self.num_remesh = KernelLauncher( prg.remeshing_kernel, self.cl_env.queue, gwi, lwi) ## Local memory array for kernels - self._num_locMem = [self.cl_env.LocalMemAllocator(self.resolution[0])] + self._num_locMem = [self.cl_env.LocalMemAllocator(self.resol_dir[0])] if self.part_advectedFields[0].nbComponents >= 2: self._num_locMem.append( self.cl_env.LocalMemAllocator( - self.resolution[0], reuse=False)) + self.resol_dir[0], reuse=False)) if self.part_advectedFields[0].nbComponents == 3: self._num_locMem.append( self.cl_env.LocalMemAllocator( - self.resolution[0], reuse=False)) + self.resol_dir[0], reuse=False)) - @debug - @timed_function - def apply(self, simulation, dtCoeff, split_id, old_dir): - """ - Apply advection operator. - - @param t : current time. - @param dt : time step. - @param d : Direction of splitting. - - Advection algorithm: - @li 1. Particle initialization : \n - - by copy scalar from grid to particles if previous splitting - direction equals current splitting direction.\n - - by transposition of scalar from grid to particle. - @li 2. Particle advection :\n - - compute particle position in splitting direction as a - scalar. Performs a RK2 resolution of dx_p/dt = a_p. - @li 3. Profile timings of OpenCL kernels. - """ - GPUParticleAdvection.apply(self, simulation, dtCoeff, - split_id, old_dir) + def _compute_advec(self, simulation, dtCoeff, split_id, old_dir): dt = simulation.timeStep * dtCoeff # Advection self.num_advec(self.velocity.gpu_data[self.dir].data, @@ -192,39 +171,45 @@ class GPUParticleAdvection2k(GPUParticleAdvection): self.gpu_precision(dt), self.coord_min[self.dir], self.mesh_size[self.dir]) + + def _compute_1c(self, simulation, dtCoeff, split_id, old_dir): + self._compute_advec(simulation, dtCoeff, split_id, old_dir) self.cl_env.queue.finish() - # Remeshing - if self.part_advectedFields[0].nbComponents == 3: - self.num_remesh(self.part_position.gpu_data[0].data, - self.part_advectedFields[0].gpu_data[0].data, - self.part_advectedFields[0].gpu_data[1].data, - self.part_advectedFields[0].gpu_data[2].data, - self.advectedFields[0].gpu_data[0].data, - self.advectedFields[0].gpu_data[1].data, - self.advectedFields[0].gpu_data[2].data, - self._num_locMem[0], - self._num_locMem[1], - self._num_locMem[2], - self.coord_min[self.dir], - self.mesh_size[self.dir]) - - elif self.part_advectedFields[0].nbComponents == 2: - self.num_remesh(self.part_position.gpu_data[0].data, - self.part_advectedFields[0].gpu_data[0].data, - self.part_advectedFields[0].gpu_data[1].data, - self.advectedFields[0].gpu_data[0].data, - self.advectedFields[0].gpu_data[1].data, - self._num_locMem[0], - self._num_locMem[1], - self.coord_min[self.dir], - self.mesh_size[self.dir]) - else: - self.num_remesh(self.part_position.gpu_data[0].data, - self.part_advectedFields[0].gpu_data[0].data, - self.advectedFields[0].gpu_data[0].data, - self._num_locMem[0], - self.coord_min[self.dir], - self.mesh_size[self.dir]) + self.num_remesh(self.part_position.gpu_data[0].data, + self.part_advectedFields[0].gpu_data[0].data, + self.advectedFields[0].gpu_data[0].data, + self._num_locMem[0], + self.coord_min[self.dir], + self.mesh_size[self.dir]) + + def _compute_2c(self, simulation, dtCoeff, split_id, old_dir): + self._compute_advec(simulation, dtCoeff, split_id, old_dir) + self.cl_env.queue.finish() + self.num_remesh(self.part_position.gpu_data[0].data, + self.part_advectedFields[0].gpu_data[0].data, + self.part_advectedFields[0].gpu_data[1].data, + self.advectedFields[0].gpu_data[0].data, + self.advectedFields[0].gpu_data[1].data, + self._num_locMem[0], + self._num_locMem[1], + self.coord_min[self.dir], + self.mesh_size[self.dir]) + + def _compute_3c(self, simulation, dtCoeff, split_id, old_dir): + self._compute_advec(simulation, dtCoeff, split_id, old_dir) + self.cl_env.queue.finish() + self.num_remesh(self.part_position.gpu_data[0].data, + self.part_advectedFields[0].gpu_data[0].data, + self.part_advectedFields[0].gpu_data[1].data, + self.part_advectedFields[0].gpu_data[2].data, + self.advectedFields[0].gpu_data[0].data, + self.advectedFields[0].gpu_data[1].data, + self.advectedFields[0].gpu_data[2].data, + self._num_locMem[0], + self._num_locMem[1], + self._num_locMem[2], + self.coord_min[self.dir], + self.mesh_size[self.dir]) def finalize(self): if self.num_advec.f_timer is not None: @@ -234,8 +219,3 @@ class GPUParticleAdvection2k(GPUParticleAdvection): self.kernels_timer.addFunctionTimer(f_timer) GPUParticleAdvection.finalize(self) - -if __name__ == "__main__": - print __doc__ - print "- Provided class : GPUParticleAdvection2k" - print GPUParticleAdvection2k.__doc__ diff --git a/HySoP/hysop/gpu/tests/test_advection_randomVelocity.py b/HySoP/hysop/gpu/tests/test_advection_randomVelocity.py index 6c146b8ed..028c04aff 100644 --- a/HySoP/hysop/gpu/tests/test_advection_randomVelocity.py +++ b/HySoP/hysop/gpu/tests/test_advection_randomVelocity.py @@ -805,7 +805,7 @@ def test_rectangular_domain2D(): Interpolation: Linear, Remesh: L2_1, Support: 'gpu_2k', - Splitting: 'x_only'}, + Splitting: 'o2'}, precision=PARMES_REAL_GPU, ) advec_py = Advection(velo, scal, @@ -815,7 +815,7 @@ def test_rectangular_domain2D(): Interpolation: Linear, Remesh: L2_1, Support: '', - Splitting: 'x_only'}, + Splitting: 'o2'}, ) advec.discretize() advec_py.discretize() diff --git a/HySoP/hysop/gpu/tests/test_copy.py b/HySoP/hysop/gpu/tests/test_copy.py index 518c56d10..84b609858 100644 --- a/HySoP/hysop/gpu/tests/test_copy.py +++ b/HySoP/hysop/gpu/tests/test_copy.py @@ -10,11 +10,11 @@ from parmepy.gpu.gpu_kernel import KernelLauncher def test_copy2D(): resolution = (256, 256) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 2 src_copy = 'kernels/copy.cl' build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=256 -D NB_II=256" build_options += " -D TILE_DIM_COPY=16" build_options += " -D BLOCK_ROWS_COPY=8" gwi = (int(resolution[0] / 2), @@ -48,11 +48,11 @@ def test_copy2D(): def test_copy2D_rect(): resolution = (256, 512) resolutionT = (512, 256) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 2 src_copy = 'kernels/copy.cl' build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=256 -D NB_II=512" build_options += " -D TILE_DIM_COPY=16" build_options += " -D BLOCK_ROWS_COPY=8" gwi = (int(resolution[0] / 2), @@ -62,7 +62,7 @@ def test_copy2D_rect(): copy_x = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) build_options = "" - build_options += cl_env.default_sizes_constants[1] + build_options += " -D NB_I=512 -D NB_II=256" build_options += " -D TILE_DIM_COPY=16" build_options += " -D BLOCK_ROWS_COPY=8" gwi = (int(resolution[1] / 2), @@ -109,11 +109,11 @@ def test_copy2D_rect(): def test_copy3D(): resolution = (64, 64, 64) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 4 src_copy = 'kernels/copy.cl' build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=64 -D NB_II=64 -D NB_III=64" build_options += " -D TILE_DIM_COPY=16" build_options += " -D BLOCK_ROWS_COPY=8" gwi = (int(resolution[0] / 4), @@ -151,12 +151,12 @@ def test_copy3D_rect(): resolution_x = (16, 32, 64) resolution_y = (32, 16, 64) resolution_z = (64, 16, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution_x) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 4 src_copy = 'kernels/copy.cl' build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=16 -D NB_II=32 -D NB_III=64" build_options += " -D TILE_DIM_COPY=16" build_options += " -D BLOCK_ROWS_COPY=8" gwi = (int(resolution_x[0] / 4), @@ -167,7 +167,7 @@ def test_copy3D_rect(): init_copy_x = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) build_options = "" - build_options += cl_env.default_sizes_constants[1] + build_options += " -D NB_I=32 -D NB_II=16 -D NB_III=64" build_options += " -D TILE_DIM_COPY=16" build_options += " -D BLOCK_ROWS_COPY=8" gwi = (int(resolution_x[1] / 4), @@ -178,7 +178,7 @@ def test_copy3D_rect(): init_copy_y = KernelLauncher(prg.copy, cl_env.queue, gwi, lwi) build_options = "" - build_options += cl_env.default_sizes_constants[2] + build_options += " -D NB_I=64 -D NB_II=16 -D NB_III=32" build_options += " -D TILE_DIM_COPY=16" build_options += " -D BLOCK_ROWS_COPY=8" gwi = (int(resolution_x[2] / 4), diff --git a/HySoP/hysop/gpu/tests/test_opencl_environment.py b/HySoP/hysop/gpu/tests/test_opencl_environment.py index 9cc5d356f..8cc1de4c5 100644 --- a/HySoP/hysop/gpu/tests/test_opencl_environment.py +++ b/HySoP/hysop/gpu/tests/test_opencl_environment.py @@ -8,11 +8,9 @@ def test_queue_unique_creation(): Testing that only one queue is created when multiples calls to get an environment. """ - cl_env = get_opencl_environment(0, 0, 'gpu', - PARMES_REAL_GPU, None) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) cl_env_id = id(cl_env) - cl_envb = get_opencl_environment(0, 0, 'gpu', - PARMES_REAL_GPU, None) + cl_envb = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) cl_envb_id = id(cl_envb) assert cl_env_id == cl_envb_id @@ -21,8 +19,7 @@ def test_parse_src_expand_floatN(): """ """ import StringIO - cl_env = get_opencl_environment(0, 0, 'gpu', - PARMES_REAL_GPU, None) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU,) str_as_src = """ vstore__N__((float__N__)(gscal_loc[noBC_id(i+__NN__,nb_part)], ), (i + gidY*WIDTH)/__N__, gscal); @@ -42,8 +39,7 @@ def test_parse_src_expand(): """ """ import StringIO - cl_env = get_opencl_environment(0, 0, 'gpu', - PARMES_REAL_GPU, None) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) str_as_src = """ gvelo_loc[noBC_id(i+__NN__,nb_part)] = v.s__NN__; """ @@ -64,8 +60,7 @@ def test_parse_expand_remeshed_component(): """ """ import StringIO - cl_env = get_opencl_environment(0, 0, 'gpu', - PARMES_REAL_GPU, None) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) str_as_src = """ __kernel void advection_and_remeshing(__global const float* gvelo, __RCOMP_P__global const float* pscal__ID__, diff --git a/HySoP/hysop/gpu/tests/test_transposition.py b/HySoP/hysop/gpu/tests/test_transposition.py index 8b5d8b7fa..91c755dda 100644 --- a/HySoP/hysop/gpu/tests/test_transposition.py +++ b/HySoP/hysop/gpu/tests/test_transposition.py @@ -10,11 +10,11 @@ from parmepy.gpu.gpu_kernel import KernelLauncher def test_transposition_xy2D(): resolution = (256, 256) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 4 src_transpose_xy = 'kernels/transpose_xy.cl' build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=256 -D NB_II=256" build_options += " -D PADDING_XY=1" build_options += " -D TILE_DIM_XY=32 -D BLOCK_ROWS_XY=8" gwi = (int(resolution[0] / 4), int(resolution[1]) / 4) @@ -63,13 +63,13 @@ def test_transposition_xy2D(): def test_transposition_xy2D_rect(): resolution = (512, 256) resolutionT = (256, 512) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 4 src_transpose_xy = 'kernels/transpose_xy.cl' build_options = "" # Settings are taken from destination layout as current layout. # gwi is computed form input layout (appears as transposed layout) - build_options += cl_env.default_sizes_constants[1] + build_options += " -D NB_I=256 -D NB_II=512" build_options += " -D TILE_DIM_XY=32 -D BLOCK_ROWS_XY=8 -D PADDING_XY=1" gwi = (int(resolution[0] / 4), int(resolution[1]) / 4) @@ -80,7 +80,7 @@ def test_transposition_xy2D_rect(): gwi, lwi) build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=512 -D NB_II=256" build_options += " -D TILE_DIM_XY=32 -D BLOCK_ROWS_XY=8 -D PADDING_XY=1" gwi = (int(resolution[1] / 4), int(resolution[0]) / 4) @@ -132,11 +132,11 @@ def test_transposition_xy2D_rect(): def test_transposition_xy3D(): resolution = (32, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 2 src_transpose_xy = 'kernels/transpose_xy.cl' build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=32 -D NB_II=32 -D NB_III=32" build_options += " -D TILE_DIM_XY=16 -D BLOCK_ROWS_XY=8 -D PADDING_XY=1" gwi = (int(resolution[0] / 2), int(resolution[1] / 2), @@ -184,13 +184,13 @@ def test_transposition_xy3D(): def test_transposition_xy3D_rect(): resolution = (32, 64, 32) resolutionT = (64, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 2 src_transpose_xy = 'kernels/transpose_xy.cl' build_options = "" # Settings are taken from destination layout as current layout. # gwi is computed form input layout (appears as transposed layout) - build_options += cl_env.default_sizes_constants[1] + build_options += " -D NB_I=64 -D NB_II=32 -D NB_III=32" build_options += " -D TILE_DIM_XY=16 -D BLOCK_ROWS_XY=8 -D PADDING_XY=1" gwi = (int(resolution[0] / 2), int(resolution[1] / 2), @@ -201,7 +201,7 @@ def test_transposition_xy3D_rect(): prg.transpose_xy, cl_env.queue, gwi, lwi) build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=32 -D NB_II=64 -D NB_III=32" build_options += " -D TILE_DIM_XY=16 -D BLOCK_ROWS_XY=8 -D PADDING_XY=1" gwi = (int(resolution[1] / 2), int(resolution[0] / 2), @@ -251,11 +251,11 @@ def test_transposition_xy3D_rect(): def test_transposition_xz3D(): resolution = (32, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 1 src_transpose_xz = 'kernels/transpose_xz_noVec.cl' build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=32 -D NB_II=32 -D NB_III=32" build_options += " -D TILE_DIM_XZ=16 -D BLOCK_ROWS_XZ=4 -D BLOCK_DEPH_XZ=4 -D PADDING_XZ=1" gwi = (int(resolution[0]), int(resolution[1] / 4), @@ -303,13 +303,13 @@ def test_transposition_xz3D(): def test_transposition_xz3D_rect(): resolution = (32, 32, 64) resolutionT = (64, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 1 src_transpose_xz = 'kernels/transpose_xz_noVec.cl' build_options = "" # Settings are taken from destination layout as current layout. # gwi is computed form input layout (appears as transposed layout) - build_options += cl_env.default_sizes_constants[2] + build_options += " -D NB_I=64 -D NB_II=32 -D NB_III=32" build_options += " -D TILE_DIM_XZ=16 -D BLOCK_ROWS_XZ=4 -D BLOCK_DEPH_XZ=4 -D PADDING_XZ=1" gwi = (int(resolution[0]), int(resolution[1] / 4), @@ -320,7 +320,7 @@ def test_transposition_xz3D_rect(): prg.transpose_xz, cl_env.queue, gwi, lwi) build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=32 -D NB_II=32 -D NB_III=64" build_options += " -D TILE_DIM_XZ=16 -D BLOCK_ROWS_XZ=4 -D BLOCK_DEPH_XZ=4 -D PADDING_XZ=1" gwi = (int(resolution[2]), int(resolution[1] / 4), @@ -368,11 +368,11 @@ def test_transposition_xz3D_rect(): def test_transposition_xz3Dslice(): resolution = (32, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 1 src_transpose_xz = 'kernels/transpose_xz_slice_noVec.cl' build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=32 -D NB_II=32 -D NB_III=32" build_options += " -D TILE_DIM_XZ=16 -D BLOCK_ROWS_XZ=1 -D BLOCK_DEPH_XZ=4 -D PADDING_XZ=1" gwi = (int(resolution[0]), int(resolution[1]), @@ -420,13 +420,13 @@ def test_transposition_xz3Dslice(): def test_transposition_xz3Dslice_rect(): resolution = (32, 32, 64) resolutionT = (64, 32, 32) - cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU, resolution) + cl_env = get_opencl_environment(0, 0, 'gpu', PARMES_REAL_GPU) vec = 1 src_transpose_xz = 'kernels/transpose_xz_slice_noVec.cl' build_options = "" # Settings are taken from destination layout as current layout. # gwi is computed form input layout (appears as transposed layout) - build_options += cl_env.default_sizes_constants[2] + build_options += " -D NB_I=64 -D NB_II=32 -D NB_III=32" build_options += " -D TILE_DIM_XZ=16 -D BLOCK_ROWS_XZ=1 -D BLOCK_DEPH_XZ=4 -D PADDING_XZ=1" gwi = (int(resolution[0]), int(resolution[1]), @@ -437,7 +437,7 @@ def test_transposition_xz3Dslice_rect(): prg.transpose_xz, cl_env.queue, gwi, lwi) build_options = "" - build_options += cl_env.default_sizes_constants[0] + build_options += " -D NB_I=32 -D NB_II=32 -D NB_III=64" build_options += " -D TILE_DIM_XZ=16 -D BLOCK_ROWS_XZ=1 -D BLOCK_DEPH_XZ=4 -D PADDING_XZ=1" gwi = (int(resolution[2]), int(resolution[1]), diff --git a/HySoP/hysop/gpu/tools.py b/HySoP/hysop/gpu/tools.py index 2528e639e..58551d7c0 100644 --- a/HySoP/hysop/gpu/tools.py +++ b/HySoP/hysop/gpu/tools.py @@ -4,7 +4,6 @@ Tools for gpu management. """ from parmepy import __VERBOSE__ -from parmepy.constants import S_DIR from parmepy.gpu import cl, clTools, PARMES_REAL_GPU, PARMES_DOUBLE_GPU, \ GPU_SRC, CL_PROFILE import re @@ -18,7 +17,7 @@ class OpenCLEnvironment(object): """ def __init__(self, platform_id, device_id, device_type, - precision, resolution, gl_sharing=False): + precision, gl_sharing=False): """Create environment. @param platform_id : OpenCL platform id @param device_id : OpenCL device id @@ -46,25 +45,6 @@ class OpenCLEnvironment(object): self.precision = precision self.macros = {} self.default_build_opts = "-Werror" - if not self.precision is None: - self.default_build_opts += self._get_precision_opts() - if resolution is not None: - for i in xrange(len(resolution)): - self.default_build_opts += " -D NB" + S_DIR[i] - self.default_build_opts += "=" + str(resolution[i]) - if len(resolution) == 3: - self.default_sizes_constants = [ - " -D NB_I=NB_X -D NB_II=NB_Y -D NB_III=NB_Z", - " -D NB_I=NB_Y -D NB_II=NB_X -D NB_III=NB_Z", - " -D NB_I=NB_Z -D NB_II=NB_X -D NB_III=NB_Y", - ] - else: - self.default_sizes_constants = [ - " -D NB_I=NB_X -D NB_II=NB_Y", - " -D NB_I=NB_Y -D NB_II=NB_X", - ] - else: - self.default_sizes_constants = None ## Kernels configuration dictionary if self.device.name == "Cayman": @@ -76,9 +56,8 @@ class OpenCLEnvironment(object): self.kernels_config = kernel_cfg self._locMem_Buffers = {} - def modify(self, platform_id, device_id, device_type, - precision, resolution, gl_sharing=False): + precision, gl_sharing=False): """ Modify OpenCL environment parameters. @param platform_id : OpenCL platform id @@ -119,25 +98,6 @@ class OpenCLEnvironment(object): self.precision = precision self.default_build_opts = self._get_precision_opts() - if resolution is not None: - self.default_build_opts = "" - if not self.precision is None: - self.default_build_opts += self._get_precision_opts() - for i in xrange(len(resolution)): - self.default_build_opts += " -D NB" + S_DIR[i] - self.default_build_opts += "=" + str(resolution[i]) - if len(resolution) == 3: - self.default_sizes_constants = [ - " -D NB_I=NB_X -D NB_II=NB_Y -D NB_III=NB_Z", - " -D NB_I=NB_Y -D NB_II=NB_X -D NB_III=NB_Z", - " -D NB_I=NB_Z -D NB_II=NB_X -D NB_III=NB_Y", - ] - else: - self.default_sizes_constants = [ - " -D NB_I=NB_X -D NB_II=NB_Y", - " -D NB_I=NB_Y -D NB_II=NB_X", - ] - def _get_platform(self, platform_id): """ Get an OpenCL platform. @@ -364,16 +324,16 @@ class OpenCLEnvironment(object): f = open(GPU_SRC + sf, 'r') else: raise ioe - gpu_src += "".join(self.parse_file( - f, vector_width, nb_remesh_components)) + gpu_src += "".join( + self.parse_file(f, vector_width, nb_remesh_components)) f.close() #print gpu_src for k in self.macros: gpu_src = gpu_src.replace(k, str(self.macros[k])) if self.precision is PARMES_REAL_GPU: - float_replace = re.compile('(?P<float>\d\.\d+)') + float_replace = re.compile(r'(?P<float>\d\.\d+)') prg = cl.Program(self.ctx, float_replace.sub( - '\g<float>f', gpu_src)) + r'\g<float>f', gpu_src)) else: prg = cl.Program(self.ctx, gpu_src.replace('float', 'double')) # OpenCL program @@ -454,7 +414,7 @@ class OpenCLEnvironment(object): """ src = "" # replacement for floatN elements - vec_floatn = re.compile('\(float__N__\)\(') + vec_floatn = re.compile(r'\(float__N__\)\(') vec_nn = re.compile('__NN__') vec_n = re.compile('__N__') for l in f.readlines(): @@ -479,23 +439,26 @@ class OpenCLEnvironment(object): # Replacement for remeshed components re_instr = re.compile(r'__RCOMP_I([\w\s\.,()\[\]+*/=-]+;)') # __RCOMP_I ...; + def repl_instruction(m): return ''.join( [m.group(1).replace('__ID__', str(i)) for i in xrange(nb_remesh_components)]) # __RCOMP_P ..., ou __RCOMP_P ...) re_param = re.compile(r'__RCOMP_P([\w\s\.\[\]+*/=-]+(?=,|\)))') + def repl_parameter(m): return ', '.join( [m.group(1).replace('__ID__', str(i)) for i in xrange(nb_remesh_components)]) - src = re_instr.sub(repl_instruction ,src) - src = re_param.sub(repl_parameter ,src) + src = re_instr.sub(repl_instruction, src) + src = re_param.sub(repl_parameter, src) return src def LocalMemAllocator(self, nbElements, reuse=True): - """Allocates a space on device local memory of size nbElements*precision + """Allocates a space on device local memory of size + nbElements*precision @param nbElements : Numer of element of the space to allocate. @param reuse : If true, try to reuse same allocated space, create new @@ -520,7 +483,7 @@ class OpenCLEnvironment(object): def get_opengl_shared_environment(platform_id, device_id, device_type, - resolution, precision): + precision): """ Get an OpenCL environment with OpenGL shared enable. @@ -535,15 +498,14 @@ def get_opengl_shared_environment(platform_id, device_id, device_type, global __cl_env if __cl_env is None: __cl_env = OpenCLEnvironment(platform_id, device_id, device_type, - precision, resolution, gl_sharing=True) + precision, gl_sharing=True) else: __cl_env.modify(platform_id, device_id, device_type, - precision, resolution, gl_sharing=True) + precision, gl_sharing=True) return __cl_env -def get_opencl_environment(platform_id, device_id, device_type, precision, - resolution): +def get_opencl_environment(platform_id, device_id, device_type, precision): """ Get an OpenCL environment. @@ -557,10 +519,10 @@ def get_opencl_environment(platform_id, device_id, device_type, precision, global __cl_env if __cl_env is None: __cl_env = OpenCLEnvironment(platform_id, device_id, device_type, - precision, resolution) + precision) else: __cl_env.modify(platform_id, device_id, device_type, - precision, resolution) + precision) return __cl_env diff --git a/HySoP/hysop/operator/advection.py b/HySoP/hysop/operator/advection.py index 9c1f85f85..522c1f006 100644 --- a/HySoP/hysop/operator/advection.py +++ b/HySoP/hysop/operator/advection.py @@ -210,31 +210,6 @@ class Advection(Operator): # --- GPU or pure-python advection --- else: - # Splitting configuration - if self.method[Splitting] == 'o2_FullHalf': - ## Half timestep in all directions - [self.splitting.append((i, 0.5)) for i in xrange(self._dim)] - [self.splitting.append((self._dim - 1 - i, 0.5)) - for i in xrange(self._dim)] - elif self.method[Splitting] == 'o1': - [self.splitting.append((i, 1.)) for i in xrange(self._dim)] - elif self.method[Splitting] == 'x_only': - self.splitting.append((0, 1.)) - elif self.method[Splitting] == 'y_only': - self.splitting.append((1, 1.)) - elif self.method[Splitting] == 'z_only': - self.splitting.append((2, 1.)) - elif self.method[Splitting] == 'o2': - ## Half timestep in all directions but last - [self.splitting.append((i, 0.5)) - for i in xrange(self._dim - 1)] - self.splitting.append((self._dim - 1, 1.)) - [self.splitting.append((self._dim - 2 - i, 0.5)) - for i in xrange(self._dim - 1)] - else: - raise ValueError('Unknown splitting configuration:' + - self.method[Splitting]) - particles_advectedFields = [ Field(adF.domain, "Particle_AdvectedFields", isVector=adF.isVector) for adF in self.advectedFields] @@ -278,43 +253,85 @@ class Advection(Operator): self.advecDir[i].setWorks(rwork, iwork) def setUp(self): - if self._isSCALES: - advectedDiscreteFields = [self.discreteFields[f] - for f in self.advectedFields] - # - Create the discreteOperator from the list of discrete fields - - from parmepy.operator.discrete.scales_advection import \ - ScalesAdvection - self.discreteOperator = ScalesAdvection( - self.discreteFields[self.velocity], - advectedDiscreteFields, method=self.method, - **self.config) + if not self._isUpToDate: + if self._isSCALES: + advectedDiscreteFields = [self.discreteFields[f] + for f in self.advectedFields] + # - Create the discreteOperator from the list of discrete fields - + from parmepy.operator.discrete.scales_advection import \ + ScalesAdvection + self.discreteOperator = ScalesAdvection( + self.discreteFields[self.velocity], + advectedDiscreteFields, method=self.method, + **self.config) - # -- Final set up -- - self.discreteOperator.setUp() - else: - for i in xrange(self._dim): - self.advecDir[i].setUp() - # If topologies differs between directions, one need Redistribute - # operators - if main_size > 1: - # Build bridges - self.bridges = self._dim * [None] + # -- Final set up -- + self.discreteOperator.setUp() + self.apply = self.discreteOperator.apply + else: + for i in xrange(self._dim): + self.advecDir[i].setUp() + # If topologies differs between directions, one need Redistribute + # operators if main_size > 1: - for dfrom in xrange(self.domain.dimension): - self.bridges[dfrom] = self._dim * [None] - for dto in xrange(self._dim): - if dfrom == dto: - self.bridges[dfrom][dto] = None - else: - self.bridges[dfrom][dto] = Redistribute( - self.input, - self.advecDir[dfrom], - self.advecDir[dto]) - self.bridges[dfrom][dto].setUp() - self._isUpToDate = True + # Build bridges + self.bridges = self._dim * [None] + if main_size > 1: + for dfrom in xrange(self.domain.dimension): + self.bridges[dfrom] = self._dim * [None] + for dto in xrange(self._dim): + if dfrom == dto: + self.bridges[dfrom][dto] = None + else: + self.bridges[dfrom][dto] = Redistribute( + self.input, + self.advecDir[dfrom], + self.advecDir[dto]) + self.bridges[dfrom][dto].setUp() + + # Splitting configuration + if self.method[Splitting] == 'o2_FullHalf': + ## Half timestep in all directions + [self.splitting.append((i, 0.5)) for i in xrange(self._dim)] + [self.splitting.append((self._dim - 1 - i, 0.5)) + for i in xrange(self._dim)] + elif self.method[Splitting] == 'o1': + [self.splitting.append((i, 1.)) for i in xrange(self._dim)] + elif self.method[Splitting] == 'x_only': + self.splitting.append((0, 1.)) + elif self.method[Splitting] == 'y_only': + self.splitting.append((1, 1.)) + elif self.method[Splitting] == 'z_only': + self.splitting.append((2, 1.)) + elif self.method[Splitting] == 'o2': + ## Half timestep in all directions but last + [self.splitting.append((i, 0.5)) + for i in xrange(self._dim - 1)] + self.splitting.append((self._dim - 1, 1.)) + [self.splitting.append((self._dim - 2 - i, 0.5)) + for i in xrange(self._dim - 1)] + else: + raise ValueError('Unknown splitting configuration:' + + self.method[Splitting]) + + if self.method[Support].find('gpu') >= 0: + splitting_nbSteps = len(self.splitting) + for d in xrange(self.domain.dimension): + dOp = self.advecDir[d].discreteOperator + assert len(dOp.exec_list) == splitting_nbSteps, \ + "Discrete operator execution " + \ + "list and splitting steps sizes must be equal " + \ + str(len(dOp.exec_list)) + " != " + \ + str(splitting_nbSteps) + + if main_size > 1: + self.apply = self._apply_Comm + else: + self.apply = self._apply_noComm + self._isUpToDate = True @debug - def apply(self, simulation=None): + def _apply_noComm(self, simulation=None): """ Apply this operator to its variables. @param simulation : object that describes the simulation @@ -323,18 +340,29 @@ class Advection(Operator): Redefinition for advection. Applying a dimensional splitting. """ - if self._isSCALES: - self.discreteOperator.apply(simulation) - else: - for split_id, split in enumerate(self.splitting): - if main_size > 1: - # Calling the redistribute operators between directions - if not self._old_dir == split[0]: - self.bridges[self._old_dir][split[0]].apply() - self.bridges[self._old_dir][split[0]].wait() - self.advecDir[split[0]].apply( - simulation, split[1], split_id, self._old_dir) - self._old_dir = split[0] + for split_id, split in enumerate(self.splitting): + self.advecDir[split[0]].apply( + simulation, split[1], split_id, self._old_dir) + self._old_dir = split[0] + + @debug + def _apply_Comm(self, simulation=None): + """ + Apply this operator to its variables. + @param simulation : object that describes the simulation + parameters (time, time step, iteration number ...), see + parmepy.problem.simulation.Simulation for details. + + Redefinition for advection. Applying a dimensional splitting. + """ + for split_id, split in enumerate(self.splitting): + # Calling the redistribute operators between directions + if not self._old_dir == split[0]: + self.bridges[self._old_dir][split[0]].apply() + self.bridges[self._old_dir][split[0]].wait() + self.advecDir[split[0]].apply( + simulation, split[1], split_id, self._old_dir) + self._old_dir = split[0] @debug def finalize(self): @@ -345,4 +373,3 @@ class Advection(Operator): for dop in self.advecDir: dop.finalize() self.timer = self.timer + dop.timer - print self.timer diff --git a/HySoP/hysop/operator/advection_dir.py b/HySoP/hysop/operator/advection_dir.py index dbdd608ed..e4d30f603 100644 --- a/HySoP/hysop/operator/advection_dir.py +++ b/HySoP/hysop/operator/advection_dir.py @@ -3,7 +3,7 @@ Advection of a field in a single direction. """ -from parmepy.constants import debug, np +from parmepy.constants import debug, np, S_DIR from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh, \ Support, Splitting from parmepy.numerics.integrators.runge_kutta2 import RK2 @@ -156,7 +156,6 @@ class AdvectionDir(Operator): """ Memory cleaning. """ - self.timer = Timer(self) + self.timer = Timer(self, suffix=S_DIR[self.dir]) self.discreteOperator.finalize() self.timer = self.timer + self.discreteOperator.timer - print self.timer diff --git a/HySoP/hysop/operator/continuous.py b/HySoP/hysop/operator/continuous.py index f4aea9a8d..0de74f2bb 100644 --- a/HySoP/hysop/operator/continuous.py +++ b/HySoP/hysop/operator/continuous.py @@ -7,6 +7,7 @@ from abc import ABCMeta, abstractmethod from parmepy.constants import debug from parmepy.mpi import main_rank import numpy as np +from parmepy.tools.timers import Timer class Operator(object): @@ -88,6 +89,7 @@ class Operator(object): input for ghosts must not be different' else: self.ghosts = topo.ghosts + self.timer = Timer(self) @staticmethod def getWorkLengths(): @@ -124,7 +126,7 @@ class Operator(object): Memory cleaning. """ self.discreteOperator.finalize() - self.timer = self.discreteOperator.timer + self.timer = self.timer + self.discreteOperator.timer @debug def apply(self, simulation=None): diff --git a/HySoP/hysop/operator/discrete/discrete.py b/HySoP/hysop/operator/discrete/discrete.py index 13aa3f9ac..8eacb1074 100644 --- a/HySoP/hysop/operator/discrete/discrete.py +++ b/HySoP/hysop/operator/discrete/discrete.py @@ -103,7 +103,7 @@ class DiscreteOperator(object): """ Cleaning, if required. """ - print self.timer + pass def __str__(self): """Common printings for discrete operators.""" diff --git a/HySoP/hysop/operator/monitors/monitoring.py b/HySoP/hysop/operator/monitors/monitoring.py index 4773c60c7..abe32e291 100644 --- a/HySoP/hysop/operator/monitors/monitoring.py +++ b/HySoP/hysop/operator/monitors/monitoring.py @@ -30,7 +30,7 @@ class Monitoring(Operator): pass def finalize(self): - print self.timer + pass def discretize(self): pass diff --git a/HySoP/hysop/operator/tests/test_advec_scales.py b/HySoP/hysop/operator/tests/test_advec_scales.py index dfd9b92b7..954c78bd5 100755 --- a/HySoP/hysop/operator/tests/test_advec_scales.py +++ b/HySoP/hysop/operator/tests/test_advec_scales.py @@ -31,6 +31,7 @@ def test_nullVelocity_m4(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: L2_1, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() @@ -70,6 +71,7 @@ def test_nullVelocity_vec_m4(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: L2_1, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() @@ -178,6 +180,7 @@ def test_nullVelocity_m8(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: M8Prime, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() @@ -217,6 +220,7 @@ def test_nullVelocity_vec_m8(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: M8Prime, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() @@ -265,6 +269,7 @@ def _randomVelocity_m4(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: L2_1, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() @@ -313,6 +318,7 @@ def _randomVelocity_vec_m4(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: L2_1, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() @@ -373,6 +379,7 @@ def test_randomVelocity_m6(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: L4_2, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() @@ -421,6 +428,7 @@ def test_randomVelocity_vec_m6(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: L4_2, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() @@ -479,6 +487,7 @@ def test_randomVelocity_m8(): method={TimeIntegrator: RK2, Interpolation: Linear, Remesh: M8Prime, + Splitting: 'o2_FullHalf', Support: ''} ) advec.discretize() diff --git a/HySoP/hysop/problem/problem.py b/HySoP/hysop/problem/problem.py index f7108bd89..a8061a03e 100644 --- a/HySoP/hysop/problem/problem.py +++ b/HySoP/hysop/problem/problem.py @@ -216,10 +216,22 @@ class Problem(object): for op in self.operators: if isinstance(op, Monitoring): op.finalize() + self.timer = self.timer + op.timer for op in self.operators: if not isinstance(op, Monitoring): op.finalize() - print self.timer + self.timer = self.timer + op.timer + var = [] + for op in self.operators: + for v in op.variables: + if not v in var: + var.append(v) + for v in var: + v.finalize() + try: + self.timer = self.timer + v.timer + except AttributeError: + pass if main_rank == 0: print "===\n" @@ -230,14 +242,6 @@ class Problem(object): s += "with following operators : \n" for op in self.operators: s += str(op) - s += "working on fields : \n" - for f in self.variables: - s += str(f) - s += "defined on domains : \n" - for d in self.domains: - s += str(d) - s += "\nsolved by : \n" + str(self.solver) - return s def dump(self, filename=None): diff --git a/HySoP/hysop/tools/timers.py b/HySoP/hysop/tools/timers.py index fb442ced1..84316ffcf 100644 --- a/HySoP/hysop/tools/timers.py +++ b/HySoP/hysop/tools/timers.py @@ -71,8 +71,8 @@ class FunctionTimer(object): return f def __str__(self): - s = "[" + str(main_rank) + "] |-- " + self.func_name - s += ' = ' + str(self.t) + " s ({0} calls)".format(self.ncalls) + s = self.func_name + s += ' : ' + str(self.t) + " s ({0} calls)".format(self.ncalls) return s @@ -103,33 +103,29 @@ class Timer(object): member of the given operator. """ - def __init__(self, op): + def __init__(self, op, suffix=''): """ Creates an Timer. @param op : Timer owner. """ ## Parent object name self._obj = op + self.name = self._obj.__class__.__name__ + suffix ## FunctionTimer dictionary with Functions names as keys. self.f_timers = {} ## Total time spent in the operator (in seconds). self.t = 0. + self._isEmpty = True def __add__(self, t): """ Overrides operator +. @param t : Other Timer object """ - t._compute_summary() - self.t += t.t - for t_k in t.f_timers.keys(): - if isinstance(t.f_timers[t_k], Timer): - self.f_timers[t_k] = t.f_timers[t_k].f_timers - else: - obj_name = t._obj.__class__.__name__ - t.f_timers[t_k].func_name = \ - obj_name + "_" + t.f_timers[t_k].func_name - self.f_timers[obj_name + "_" + t_k] = t.f_timers[t_k] + t.compute_summary() + #self.t += t.t + self._isEmpty = False + self.f_timers[t.name] = t return self def getFunctionTimer(self, func): @@ -154,50 +150,43 @@ class Timer(object): """ self.f_timers[ft.func_name] = ft - def addSubTimer(self, t, prefix): + def addSubTimer(self, t, prefix=''): if len(t.f_timers.keys()) > 0: - self.f_timers[t._obj.__class__.__name__ + prefix] = t + t.compute_summary() + self.f_timers[t.name + prefix] = t - def _compute_summary(self): + def compute_summary(self): """ Compute a summary of the different functions referenced herein. """ self.t = 0. for f in self.f_timers.keys(): - try: + # if self.f_timers[f] is a Timer, these times are already + # in the current Timer sum. + if not isinstance(self.f_timers[f], Timer): self.t += self.f_timers[f].t - except AttributeError: - # self.f_timers[f] is a sub-Timer times are details of - # functions times. these times are already in sum. - pass + self._isEmpty = False - def __str__(self): - self._compute_summary() - s = "[" + str(main_rank) + "] Time spent in " - s += self._obj.__class__.__name__ + ' = ' + str(self.t) + ' s' + def rTimes(self): + s = "" for f in sorted(self.f_timers.keys()): - if isinstance(self.f_timers[f], dict): - s += "\n[" + str(main_rank) + "] |-- " + f - for sf in sorted(self.f_timers[f].keys()): - s += "\n" + str( - self.f_timers[f][sf]).replace('|--', ' |--') + nl = "@@F@@[" + str(main_rank) + "]@@S@@" + if isinstance(self.f_timers[f], Timer): + s += nl + ' |-- ' + f + if self.f_timers[f].t > 0.: + s += " : " + str(self.f_timers[f].t) + subTimer = self.f_timers[f] + s += subTimer.rTimes().replace('@@S@@', '@@S@@ ') else: - s += "\n" + str(self.f_timers[f]) + s += nl + ' |-- ' + str(self.f_timers[f]) return s - def toString(self): - """ - Returns a string containing timings informations - @return timings as string. - """ - h = self._obj.__class__.__name__.replace(' ', '_') + ' ' - t = str(self.t) + ' ' - for f in sorted(self.f_timers.keys()): - if isinstance(self.f_timers[f], dict): - for sf in sorted(self.f_timers[f].keys()): - h += (f + sf).replace(' ', '_') + ' ' - t += str(self.f_timers[f][sf].t) + ' ' - else: - h += f.replace(' ', '_') + ' ' - t += str(self.f_timers[f].t) + ' ' - return h, t + def __str__(self): + self.compute_summary() + if not self._isEmpty: + s = "[" + str(main_rank) + "] Time spent in " + s += self.name + ' = ' + str(self.t) + ' s' + s += self.rTimes().replace('@@F@@', '\n').replace('@@S@@', '') + else: + s = "" + return s -- GitLab