From c63ccf19425fd0aa2eb2189bfca27008eb621629 Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@imag.fr> Date: Thu, 26 Apr 2012 15:23:29 +0000 Subject: [PATCH] =?UTF-8?q?Am=C3=A9liorations=20code=20gpu=20(r=C3=A9organ?= =?UTF-8?q?isation=20des=20buffers=20en=20m=C3=A9moire=20+=20initialisatio?= =?UTF-8?q?n=20des=20variables=20sur=20gpu)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- parmepy/examples/mainJM.py | 23 +- parmepy/examples/mainJM_kernels.cl | 52 +++++ parmepy/examples/main_Rotating_2D.py | 4 +- parmepy/examples/main_Rotating_2D_GH.py | 79 +++++++ .../examples/main_Rotating_2D_GH_kernels.cl | 51 +++++ parmepy/examples/print_volume.gp | 19 ++ parmepy/new_ParMePy/Operator/AdvectionDOp.py | 53 +++-- .../new_ParMePy/Operator/DiscreteOperator.py | 3 + parmepy/new_ParMePy/Operator/RemeshingDOp.py | 10 +- parmepy/new_ParMePy/Operator/VelocityDOp.py | 83 +++++++ parmepy/new_ParMePy/Operator/VelocityOp.py | 50 +++++ parmepy/new_ParMePy/Operator/VolumeDOp.py | 102 +++++++++ parmepy/new_ParMePy/Operator/__init__.py | 1 + parmepy/new_ParMePy/Param.py | 1 + .../new_ParMePy/Problem/ContinuousProblem.py | 25 ++- .../Problem/ContinuousTransportProblem.py | 4 +- .../new_ParMePy/Problem/DiscreteProblem.py | 26 ++- .../Problem/DiscreteTransportProblem.py | 7 +- .../new_ParMePy/Utils/GPUParticularSolver.py | 200 ++++++++++++----- .../Utils/GPUParticularSolver_GLRender.py | 203 ++++++++---------- parmepy/new_ParMePy/Utils/ParticularSolver.py | 11 +- parmepy/new_ParMePy/Utils/Printer.py | 4 +- parmepy/new_ParMePy/Utils/gpu_src.cl | 187 ++++++++++++---- .../Variable/AnalyticalVariable.py | 5 +- .../Variable/ContinuousVariable.py | 1 + .../new_ParMePy/Variable/DiscreteVariable.py | 38 ++-- 26 files changed, 958 insertions(+), 284 deletions(-) create mode 100644 parmepy/examples/mainJM_kernels.cl create mode 100644 parmepy/examples/main_Rotating_2D_GH.py create mode 100644 parmepy/examples/main_Rotating_2D_GH_kernels.cl create mode 100644 parmepy/examples/print_volume.gp create mode 100644 parmepy/new_ParMePy/Operator/VelocityDOp.py create mode 100644 parmepy/new_ParMePy/Operator/VelocityOp.py create mode 100644 parmepy/new_ParMePy/Operator/VolumeDOp.py diff --git a/parmepy/examples/mainJM.py b/parmepy/examples/mainJM.py index 7fed728a4..3e262b37b 100644 --- a/parmepy/examples/mainJM.py +++ b/parmepy/examples/mainJM.py @@ -18,16 +18,10 @@ def run(): # Parameter dim = 3 nb = 128 - boxLength = [1., 1., 1.] + boxlength = [1., 1., 1.] boxMin = [0., 0., 0.] nbElem = [nb, nb, nb] - # dim = 2 - # nb = 64 - # boxLength = [1., 1.] - # boxMin = [0., 0.] - # nbElem = [nb, nb] - timeStep = 0.02 FinalTime = 1. outputFilePrefix = './res/RK2_' @@ -36,14 +30,14 @@ def run(): t0 = time.time() box = pp.Box(dimension=dim, - length=boxLength, + length=boxlength, minPosition=boxMin) velo = pp.AnalyticalVariable(domain=box, dimension=dim, - formula=vitesse) + formula=vitesse, name='Velocity') scal = pp.AnalyticalVariable(domain=box, dimension=1, - formula=scalaire, scalar=True) + formula=scalaire, scalar=True, name='Scalar') advec = pp.Advection(velo, scal) cTspPb = pp.ContinuousTransportProblem(advection=advec) @@ -53,10 +47,13 @@ def run(): RemeshingMethod = pp.M6Prime(grid=box.discreteDomain, gvalues=scal.discreteVariable) ODESolver = pp.RK2(InterpolationMethod, box.discreteDomain.applyConditions) ## cpu - #solver = pp.ParticularSolver(ODESolver, InterpolationMethod, RemeshingMethod) + #solver = pp.ParticularSolver(cTspPb, ODESolver, InterpolationMethod, RemeshingMethod) ## gpu - #solver = pp.GPUParticularSolver(cTspPb, ODESolver, InterpolationMethod, RemeshingMethod, device_type='gpu') - solver = pp.GPUParticularSolver_GLRender(cTspPb, ODESolver, InterpolationMethod, RemeshingMethod, device_type='gpu', dt=timeStep) + solver = pp.GPUParticularSolver(cTspPb, + ODESolver, InterpolationMethod, RemeshingMethod, + device_type='gpu', src='./examples/mainJM_kernels.cl' + ) + #solver = pp.GPUParticularSolver_GLRender(cTspPb, ODESolver, InterpolationMethod, RemeshingMethod, device_type='gpu', dt=timeStep) cTspPb.setSolver(solver) cTspPb.setPrinter(pp.Printer(frequency=outputModulo, outputPrefix=outputFilePrefix, problem=cTspPb)) diff --git a/parmepy/examples/mainJM_kernels.cl b/parmepy/examples/mainJM_kernels.cl new file mode 100644 index 000000000..007e7c297 --- /dev/null +++ b/parmepy/examples/mainJM_kernels.cl @@ -0,0 +1,52 @@ + +__kernel void initScalar(__global float* values, + __private const float t, + __private const float4 min, + __private const uint4 nb, + __private const float4 size) +{ + __private uint ind,ix,iy,iz; + __private float px,py,pz,s; + + ix = get_global_id(0); + iy = get_global_id(1); + iz = get_global_id(2); + + px = min.x + (float)(ix)*size.x; + py = min.y + (float)(iy)*size.y; + pz = min.z + (float)(iz)*size.z; + + if ((px < 0.5f) && (py < 0.5f) && (pz < 0.5f)) + s = 1.0f; + else + s = 0.0f; + // Write + ind = iy + ix*nb.y; + values[ind] = s; +} + +// velocity field +__kernel void initVelocity(__global float* values, + __private const float t, + __private const float4 min, + __private const uint4 nb, + __private const float4 size + ) +{ + __private uint ix,iy,iz, ind; + + ix = get_global_id(0); + iy = get_global_id(1); + iz = get_global_id(2); + + // Write x component + ind = ix*nb.y*nb.z + iy*nb.z + iz; + values[ind] = 1.0f; + // Write y component + ind = (nb.x*nb.y*nb.z) + iy*nb.x*nb.z + ix*nb.z + iz; + values[ind] = 1.0f; + // Write z component + ind = (nb.x*nb.y*nb.z)*2 + iz*nb.x*nb.y + ix*nb.y + iy; + values[ind] = 1.0f; + +} diff --git a/parmepy/examples/main_Rotating_2D.py b/parmepy/examples/main_Rotating_2D.py index d4d637a81..20b9b6e30 100644 --- a/parmepy/examples/main_Rotating_2D.py +++ b/parmepy/examples/main_Rotating_2D.py @@ -18,12 +18,12 @@ def scalaire(x): def run(): dim = 2 - nb = 128 + nb = 1024 boxLength = [2., 2.] boxMin = [-1., -1.] nbElem = [nb, nb] - timeStep = 0.02 + timeStep = 0.07 FinalTime = 1. outputFilePrefix = './res/RK2_' outputModulo = 1 diff --git a/parmepy/examples/main_Rotating_2D_GH.py b/parmepy/examples/main_Rotating_2D_GH.py new file mode 100644 index 000000000..e15f51324 --- /dev/null +++ b/parmepy/examples/main_Rotating_2D_GH.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +import time +import new_ParMePy as pp +import math + + +def vitesse(x): + return [-math.sin(x[0] * math.pi) ** 2 * math.sin(x[1] * math.pi * 2), + math.sin(x[1] * math.pi) ** 2 * math.sin(x[0] * math.pi * 2)] +#vx(i)=-sin(pix)*sin(pix)*sin(pi2y)*amodul +#vy(i)=+sin(pi2x)*sin(piy)*sin(piy)*amodul + + +def scalaire(x): + rr = math.sqrt((x[0] - 0.5) ** 2 + (x[1] - 0.75) ** 2) + if rr < 0.15: + return 1. + else: + return 0. + + +def run(): + dim = 2 + nb = 1024 + boxLength = [1., 1.] + boxMin = [0., 0.] + nbElem = [nb, nb] + + timeStep = 0.07 + FinalTime = 5. + outputFilePrefix = './res/RK2_' + outputModulo = 1 + + t0 = time.time() + + box = pp.Domain.Box(dimension=dim, + length=boxLength, + minPosition=boxMin) + velo = pp.Variable.AnalyticalVariable(domain=box, + dimension=dim, name="Velocity" + ) + scal = pp.Variable.AnalyticalVariable(domain=box, + dimension=1, + formula=scalaire, scalar=True, name="Scalar" + ) + advec = pp.Operator.Advection(velo, scal) + cTspPb = pp.Problem.ContinuousTransportProblem(advection=advec) + cTspPb.addOperator(pp.Operator.VelocityOp(velo, vitesse, period=24.), 0) + + cTspPb.discretize(dSpec=[nbElem]) + + InterpolationMethod = pp.Utils.Linear(grid=box.discreteDomain, gvalues=velo.discreteVariable) + RemeshingMethod = pp.Utils.M6Prime(grid=box.discreteDomain, gvalues=scal.discreteVariable) + ODESolver = pp.Utils.RK2(InterpolationMethod, box.discreteDomain.applyConditions) + ## cpu + #solver = pp.Utils.ParticularSolver(cTspPb, ODESolver, InterpolationMethod, RemeshingMethod) + ## gpu + #solver = pp.GPUParticularSolver(cTspPb, ODESolver, InterpolationMethod, RemeshingMethod, device_type='gpu') + solver = pp.GPUParticularSolver_GLRender(cTspPb, + ODESolver, InterpolationMethod, RemeshingMethod, + device_type='gpu', dt=timeStep, + src='./examples/main_Rotating_2D_GH_kernels.cl' + ) + cTspPb.setSolver(solver) + cTspPb.setPrinter(pp.Utils.Printer(frequency=outputModulo, outputPrefix=outputFilePrefix, problem=cTspPb)) + + t1 = time.time() + + cTspPb.solve(T=FinalTime, dt=timeStep) + + tf = time.time() + + print "Total time : ", tf - t0, "sec (CPU)" + print "Init time : ", t1 - t0, "sec (CPU)" + print "Solving time : ", tf - t1, "sec (CPU)" + + +if __name__ == "__main__": + run() diff --git a/parmepy/examples/main_Rotating_2D_GH_kernels.cl b/parmepy/examples/main_Rotating_2D_GH_kernels.cl new file mode 100644 index 000000000..6232abf62 --- /dev/null +++ b/parmepy/examples/main_Rotating_2D_GH_kernels.cl @@ -0,0 +1,51 @@ + +__kernel void initScalar(__global float* values, + __private const float t, + __private const float4 min, + __private const uint4 nb, + __private const float4 size) +{ + __private uint ind,ix,iy; + __private float px,py,s; + + ix = get_global_id(0); + iy = get_global_id(1); + + px = min.x + (float)(ix)*size.x; + py = min.y + (float)(iy)*size.y; + + if (sqrt((px-0.5f)*(px-0.5f) + (py-0.75f)*(py-0.75f)) < 0.15f) + s = 1.0f; + else + s = 0.0f; + // Write + ind = iy + ix*nb.y; + values[ind] = s; +} + +// velocity field +__kernel void velocity(__global float* gvelo, + __private const float t, + __private const float period, + __private const float4 min, + __private const uint4 nb, + __private const float4 size + ) +{ + __private uint ix,iy, ind; + __private float px,py,vx,vy; + + ix = get_global_id(0); + iy = get_global_id(1); + + px = min.x + (float)(ix)*size.x; //2flop + py = min.y + (float)(iy)*size.y; //2flop + + vx = -sin(px*M_PI_F)*sin(px*M_PI_F)*sin(2.0f*py*M_PI_F)*cos(t*M_PI_F/period); // 14flop + vy = sin(py*M_PI_F)*sin(py*M_PI_F)*sin(2.0f*px*M_PI_F)*cos(t*M_PI_F/period); // 14flop + + ind = iy + ix*nb.y; + gvelo[ind] = vx; + ind = nb.x*nb.y + ix + iy*nb.x; + gvelo[ind] = vy; +} diff --git a/parmepy/examples/print_volume.gp b/parmepy/examples/print_volume.gp new file mode 100644 index 000000000..a8dd09cef --- /dev/null +++ b/parmepy/examples/print_volume.gp @@ -0,0 +1,19 @@ +reset +set auto + +vol_theo = pi*0.15*0.15 + +set title "Volume en fonction du temps" +set xl 'temps' +set yl 'volume' +set ytics nomirror +set y2l 'erreur relative' +set y2tics +set key outside bottom center maxrows 1 box height 1 +set yr [0:*] + +p'volume.dat' u ($0*0.07):1 w lp t 'volume' axes x1y1,\ + (vol_theo) t'volume théorique' axes x1y1, \ + 'volume.dat' u ($0*0.07):(abs($1-vol_theo)/vol_theo) t 'erreur' w lp axes x1y2 + +l'loop.gp' \ No newline at end of file diff --git a/parmepy/new_ParMePy/Operator/AdvectionDOp.py b/parmepy/new_ParMePy/Operator/AdvectionDOp.py index 746afd0b8..99204c1e2 100644 --- a/parmepy/new_ParMePy/Operator/AdvectionDOp.py +++ b/parmepy/new_ParMePy/Operator/AdvectionDOp.py @@ -31,7 +31,7 @@ class AdvectionDOp(DiscreteOperator): ## Transported scalar. self.scalar = advec.scalar.discreteVariable self.addVariable([self.velocity, self.scalar]) - self.gpu_kernel = None + self.gpu_kernel_name = "advection" self.compute_time = [0., 0., 0.] self.submit_time = 0. self.queued_time = 0. @@ -73,24 +73,53 @@ class AdvectionDOp(DiscreteOperator): else: l = list(self.gpu_shape) nb_ligne = l.pop(splittingDirection) - evt = self.gpu_kernel(self.gpu_queue, tuple(l), None, - self.velocity.gpu_mem_object, - self.scalar.gpu_mem_object, - self.resultPosition.gpu_mem_object, - self.resultScalar.gpu_mem_object, - dtype_real_gpu(t), dtype_real_gpu(dt), dtype_integer(splittingDirection), dtype_integer(nb_ligne), - self.velocity.domain.min.astype(dtype_real_gpu), - self.velocity.domain.length.astype(dtype_real_gpu), - self.velocity.domain.elementNumber.astype(dtype_integer), - self.velocity.domain.elementSize.astype(dtype_real_gpu) - ) + local_wg = list(l) + if len(l) == 2: + local_wg[0] = 1 + local_wg = tuple(local_wg) + else: + local_wg = None + if debug == 0: + evt_copy = cl.enqueue_copy(self.gpu_queue, self.resultScalar.gpu_mem_object, self.scalar.gpu_mem_object) + evt = self.gpu_kernel.advection(self.gpu_queue, tuple(l), local_wg, + self.velocity.gpu_mem_object, + self.scalar.gpu_mem_object, + self.resultPosition.gpu_mem_object, + self.resultScalar.gpu_mem_object, + dtype_real_gpu(t), dtype_real_gpu(dt), dtype_integer(splittingDirection), dtype_integer(nb_ligne), + self.velocity.domain.min.astype(dtype_real_gpu), + self.velocity.domain.length.astype(dtype_real_gpu), + self.velocity.domain.elementNumber.astype(dtype_integer), + self.velocity.domain.elementSize.astype(dtype_real_gpu) + ) + else: + evt = self.gpu_kernel.advection(self.gpu_queue, tuple(l), None, + self.velocity.gpu_mem_object, + self.scalar.gpu_mem_object, + self.resultPosition.gpu_mem_object, + self.resultScalar.gpu_mem_object, + self.debug_float_buffer, self.debug_integer_buffer, + dtype_real_gpu(t), dtype_real_gpu(dt), dtype_integer(splittingDirection), dtype_integer(nb_ligne), + self.velocity.domain.min.astype(dtype_real_gpu), + self.velocity.domain.length.astype(dtype_real_gpu), + self.velocity.domain.elementNumber.astype(dtype_integer), + self.velocity.domain.elementSize.astype(dtype_real_gpu) + ) # self.gpu_queue.finish() # cl.enqueue_copy(self.gpu_queue, self.resultPosition.values, self.resultPosition.gpu_mem_object) # print self.resultPosition.values self.gpu_queue.finish() + if debug > 0: + cl.enqueue_copy(self.gpu_queue, self.debug_float, self.debug_float_buffer) + cl.enqueue_copy(self.gpu_queue, self.debug_integer, self.debug_integer_buffer) + print " :: DEBUG :: ", self.debug_float + print " :: DEBUG :: ", self.debug_integer self.queued_time += (evt.profile.submit - evt.profile.queued) * 1e-9 self.submit_time += (evt.profile.start - evt.profile.submit) * 1e-9 self.compute_time[splittingDirection] += (evt.profile.end - evt.profile.start) * 1e-9 + self.queued_time += (evt_copy.profile.submit - evt_copy.profile.queued) * 1e-9 + self.submit_time += (evt_copy.profile.start - evt_copy.profile.submit) * 1e-9 + self.compute_time[splittingDirection] += (evt_copy.profile.end - evt_copy.profile.start) * 1e-9 self.total_flop += ((self.velocity.domain.dimension-1) + 19 * nb_ligne) * np.prod(l) # 1*(DIM-1) flop d'initialisation + 19flop par particule d'une ligne self.total_bytes_accessed += ((2 + 1 + self.velocity.domain.dimension) * nb_ligne) * np.prod(l) * 4 # 2 floats lus + 1+DIM float écrits #print "Advection:", self.compute_time * 1e-9 diff --git a/parmepy/new_ParMePy/Operator/DiscreteOperator.py b/parmepy/new_ParMePy/Operator/DiscreteOperator.py index eec27590a..c95859de8 100644 --- a/parmepy/new_ParMePy/Operator/DiscreteOperator.py +++ b/parmepy/new_ParMePy/Operator/DiscreteOperator.py @@ -22,6 +22,9 @@ class DiscreteOperator: self.variables = [] ## Operator numerical method. self.numMethod = [] + self.is_splittable = True + self.gpu_kernel = None + self.gpu_kernel_name = "" def addVariable(self, cVariable): """ diff --git a/parmepy/new_ParMePy/Operator/RemeshingDOp.py b/parmepy/new_ParMePy/Operator/RemeshingDOp.py index fb910511d..6942158c4 100644 --- a/parmepy/new_ParMePy/Operator/RemeshingDOp.py +++ b/parmepy/new_ParMePy/Operator/RemeshingDOp.py @@ -44,7 +44,7 @@ class RemeshingDOp(DiscreteOperator): if not self.ptag is None: self.addVariable([self.ptag]) self.numMethod = method - self.gpu_kernel = None + self.gpu_kernel_name = "remeshing" self.compute_time = [0., 0., 0.] self.queued_time = 0. self.submit_time = 0. @@ -77,7 +77,13 @@ class RemeshingDOp(DiscreteOperator): else: l = list(self.gpu_shape) nb_ligne = l.pop(splittingDirection) - evt = self.gpu_kernel(self.gpu_queue, tuple(l), None, + local_wg = list(l) + if len(l) == 2: + local_wg[0] = 1 + local_wg = tuple(local_wg) + else: + local_wg = None + evt = self.gpu_kernel.remeshing(self.gpu_queue, tuple(l), local_wg, self.ppos.gpu_mem_object, self.pscal.gpu_mem_object, self.resultScalar.gpu_mem_object, dtype_real_gpu(t), dtype_real_gpu(dt), dtype_integer(splittingDirection), dtype_integer(nb_ligne), self.resultScalar.domain.min.astype(dtype_real_gpu), diff --git a/parmepy/new_ParMePy/Operator/VelocityDOp.py b/parmepy/new_ParMePy/Operator/VelocityDOp.py new file mode 100644 index 000000000..b35a362a4 --- /dev/null +++ b/parmepy/new_ParMePy/Operator/VelocityDOp.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +""" +@package Operator +Operator representation +""" +from ..Param import * +from DiscreteOperator import DiscreteOperator +import time +import pyopencl as cl + +providedClass = "VelocityDOp" + + +class VelocityDOp(DiscreteOperator): + """ + Advection operator representation. + DiscreteOperator.DiscreteOperator specialization. + """ + + def __init__(self, velo_op): + """ + Constructor. + Create a Transport operator on a given continuous domain. + Work on a given scalar at a given velocity to produce scalar distribution at new positions. + + @param advec : Advection operator. + """ + DiscreteOperator.__init__(self) + self.is_splittable = False + ## Velocity. + self.velocity = velo_op.velocity.discreteVariable + ## Transported scalar. + self.formula = velo_op.formula + self.period = velo_op.period + self.addVariable([self.velocity]) + self.gpu_kernel_name = "velocity" + self.compute_time = [0., 0., 0.] + self.submit_time = 0. + self.queued_time = 0. + self.total_flop = 0 + self.total_bytes_accessed = 0 + + def applyOperator(self, t, dt, splittingDirection): + """ + Apply advection operator. + + @param t : current time. + @param dt : time step for advection. + @param splittingDirection : direction to advect. + """ + if self.gpu_kernel is None: + raise NotImplementedError("No implementation for velocity opérator on CPU.") + else: + evt = self.gpu_kernel.velocity(self.gpu_queue, tuple(self.gpu_shape), None, + self.velocity.gpu_mem_object, + dtype_real_gpu(t), dtype_real_gpu(self.period), + self.velocity.domain.min.astype(dtype_real_gpu), + self.velocity.domain.elementNumber.astype(dtype_integer), + self.velocity.domain.elementSize.astype(dtype_real_gpu) + ) + # self.gpu_queue.finish() + # cl.enqueue_copy(self.gpu_queue, self.resultPosition.values, self.resultPosition.gpu_mem_object) + # print self.resultPosition.values + self.gpu_queue.finish() + #temp_tab = np.zeros_like(self.velocity.values) + #cl.enqueue_copy(self.gpu_queue, temp_tab, self.velocity.gpu_mem_object) + #self.gpu_queue.finish() + #print temp_tab + self.queued_time += (evt.profile.submit - evt.profile.queued) * 1e-9 + self.submit_time += (evt.profile.start - evt.profile.submit) * 1e-9 + self.compute_time[splittingDirection] += (evt.profile.end - evt.profile.start) * 1e-9 + self.total_flop += 32 * np.prod(self.gpu_shape) + self.total_bytes_accessed += 4 * 2 * np.prod(self.gpu_shape) + #print "Advection:", self.compute_time * 1e-9 + + def __str__(self): + """ToString method""" + return "VelocityDOp (DiscreteOperator)" + +if __name__ == "__main__": + print __doc__ + print "- Provided class : VelocityDOp" + print VelocityDOp.__doc__ diff --git a/parmepy/new_ParMePy/Operator/VelocityOp.py b/parmepy/new_ParMePy/Operator/VelocityOp.py new file mode 100644 index 000000000..1d46498f5 --- /dev/null +++ b/parmepy/new_ParMePy/Operator/VelocityOp.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +""" +@package Operator +Operator representation +""" +from ..Param import * +from ContinuousOperator import ContinuousOperator +from VelocityDOp import VelocityDOp + + +class VelocityOp(ContinuousOperator): + """ + Velocity operator representation + """ + + def __init__(self, velocity, formula, period=10.): + """ + Constructor. + Create an Velocity operator from given variables velocity and scalar. + + @param velocity ContinuousVariable.ContinuousVariable : velocity variable. + @param scalar ContinuousVariable.ContinuousVariable : scalar variable. + """ + ContinuousOperator.__init__(self) + ## advection velocity variable + self.velocity = velocity + ## advected scalar variable + self.formula = formula + self.period = period + self.addVariable([self.velocity]) + + def discretize(self, spec=None): + """ + Velocity operator discretization method. + Create an VelocityDOp.VelocityDOp from given specifications. + + @param spec : discretization specifications, not used. + """ + if self.discreteOperator is None: + self.discreteOperator = VelocityDOp(self) + + def __str__(self): + """ToString method""" + s = " Velocity operator (ContinuousOperator)" + return s + "\n" + +if __name__ == "__main__": + print __doc__ + print "- Provided class : VelocityOp" + print VelocityOp.__doc__ diff --git a/parmepy/new_ParMePy/Operator/VolumeDOp.py b/parmepy/new_ParMePy/Operator/VolumeDOp.py new file mode 100644 index 000000000..0eba6e0a3 --- /dev/null +++ b/parmepy/new_ParMePy/Operator/VolumeDOp.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +""" +@package Operator +Operator representation +""" +from ..Param import * +from DiscreteOperator import DiscreteOperator +import time +import pyopencl as cl +import pylab as pl + +providedClass = "VolumeDOp" + + +class VolumeDOp(DiscreteOperator): + """ + Advection operator representation. + DiscreteOperator.DiscreteOperator specialization. + """ + + def __init__(self, volume, pscal, pvolume, level=0.5, filename='./volume.dat'): + """ + Constructor. + Create a Transport operator on a given continuous domain. + Work on a given scalar at a given velocity to produce scalar distribution at new positions. + + @param advec : Advection operator. + """ + DiscreteOperator.__init__(self) + self.is_splittable = False + ## Volume. + self.volume = volume + self.pscal = pscal + self.pvolume = pvolume + self.addVariable([self.volume, self.pscal]) + self.level = level + self.volume_evolution = [] + self.gpu_kernel_name = "volume" + self.compute_time = [0., 0., 0.] + self.submit_time = 0. + self.queued_time = 0. + self.total_flop = 0 + self.total_bytes_accessed = 0 + self.file = open(filename, 'w', 1) + + def applyOperator(self, t, dt, splittingDirection): + """ + Apply advection operator. + + @param t : current time. + @param dt : time step for advection. + @param splittingDirection : direction to advect. + """ + if self.gpu_kernel is None: + raise NotImplementedError("No implementation for velocity opérator on CPU.") + else: + l = list(self.gpu_shape) + nb_ligne = l.pop(splittingDirection) + if debug == 0: + evt = self.gpu_kernel.volume(self.gpu_queue, tuple(l), None, + self.pscal.gpu_mem_object, + self.volume.gpu_mem_object, + dtype_real_gpu(self.level), dtype_real_gpu(self.pvolume), dtype_integer(splittingDirection), dtype_integer(nb_ligne), + self.volume.domain.min.astype(dtype_real_gpu), + self.volume.domain.length.astype(dtype_real_gpu), + self.volume.domain.elementNumber.astype(dtype_integer), + self.volume.domain.elementSize.astype(dtype_real_gpu) + ) + else: + evt = self.gpu_kernel.volume(self.gpu_queue, tuple(l), None, + self.pscal.gpu_mem_object, + self.volume.gpu_mem_object, + self.debug_float_buffer, self.debug_integer_buffer, + dtype_real_gpu(self.length), dtype_real_gpu(self.pvolume), dtype_integer(splittingDirection), dtype_integer(nb_ligne), + self.volume.domain.min.astype(dtype_real_gpu), + self.volume.domain.length.astype(dtype_real_gpu), + self.volume.domain.elementNumber.astype(dtype_integer), + self.volume.domain.elementSize.astype(dtype_real_gpu) + ) + self.gpu_queue.finish() + cl.enqueue_copy(self.gpu_queue, self.volume.values, self.volume.gpu_mem_object) + self.file.write(str(self.volume.values.sum())+'\n') + if debug > 0: + cl.enqueue_copy(self.gpu_queue, self.debug_float, self.debug_float_buffer) + cl.enqueue_copy(self.gpu_queue, self.debug_integer, self.debug_integer_buffer) + print " :: DEBUG :: ", self.debug_float + print " :: DEBUG :: ", self.debug_integer + self.queued_time += (evt.profile.submit - evt.profile.queued) * 1e-9 + self.submit_time += (evt.profile.start - evt.profile.submit) * 1e-9 + self.compute_time[splittingDirection] += (evt.profile.end - evt.profile.start) * 1e-9 + self.total_flop += ((self.volume.domain.dimension-1) + 19 * nb_ligne) * np.prod(l) # 1*(DIM-1) flop d'initialisation + 19flop par particule d'une ligne + self.total_bytes_accessed += ((2 + 1 + self.volume.domain.dimension) * nb_ligne) * np.prod(l) * 4 # 2 floats lus + 1+DIM float écrits + #print "Advection:", self.compute_time * 1e-9 + + def __str__(self): + """ToString method""" + return "VolumeDOp (DiscreteOperator)" + +if __name__ == "__main__": + print __doc__ + print "- Provided class : VolumeDOp" + print VolumeDOp.__doc__ diff --git a/parmepy/new_ParMePy/Operator/__init__.py b/parmepy/new_ParMePy/Operator/__init__.py index 8834fdf85..f1513ea05 100644 --- a/parmepy/new_ParMePy/Operator/__init__.py +++ b/parmepy/new_ParMePy/Operator/__init__.py @@ -5,3 +5,4 @@ from AdvectionDOp import AdvectionDOp from InterpolationDOp import InterpolationDOp from RemeshingDOp import RemeshingDOp from TagDOp import TagDOp +from VelocityOp import VelocityOp diff --git a/parmepy/new_ParMePy/Param.py b/parmepy/new_ParMePy/Param.py index 861819a8b..56276c965 100644 --- a/parmepy/new_ParMePy/Param.py +++ b/parmepy/new_ParMePy/Param.py @@ -9,3 +9,4 @@ dtype_real = np.float64 dtype_integer = np.uint32 dtype_real_gpu = np.float32 order = 'C' +debug = 0 diff --git a/parmepy/new_ParMePy/Problem/ContinuousProblem.py b/parmepy/new_ParMePy/Problem/ContinuousProblem.py index d7b683b1f..cce6b951c 100644 --- a/parmepy/new_ParMePy/Problem/ContinuousProblem.py +++ b/parmepy/new_ParMePy/Problem/ContinuousProblem.py @@ -48,17 +48,32 @@ class ContinuousProblem: if self.domains.count(v.domain) == 0: self.domains.append(v.domain) - def addOperator(self, cOperator): + def addOperator(self, cOperator, index=None): """ Add a list of ContinuousOperator.ContinuousOperator to the problem. Also add operators' variables and variables' domains to the problem. @param cOperator ContinuousOperator.ContinuousOperator : list of operators to add. """ - for o in cOperator: - if self.operators.count(o) == 0: - self.operators.append(o) - for v in o.variables: + if isinstance(cOperator, list): + for i,o in enumerate(cOperator): + if self.operators.count(o) == 0: + try: + self.operators.insert(index[i],o) + except TypeError: + self.operators.append(o) + for v in o.variables: + if self.variables.count(v) == 0: + self.variables.append(v) + if self.domains.count(v.domain) == 0: + self.domains.append(v.domain) + else: + if self.operators.count(cOperator) == 0: + try: + self.operators.insert(index, cOperator) + except TypeError: + self.operators.append(cOperator) + for v in cOperator.variables: if self.variables.count(v) == 0: self.variables.append(v) if self.domains.count(v.domain) == 0: diff --git a/parmepy/new_ParMePy/Problem/ContinuousTransportProblem.py b/parmepy/new_ParMePy/Problem/ContinuousTransportProblem.py index dc2ce726d..969a98eed 100644 --- a/parmepy/new_ParMePy/Problem/ContinuousTransportProblem.py +++ b/parmepy/new_ParMePy/Problem/ContinuousTransportProblem.py @@ -27,8 +27,6 @@ class ContinuousTransportProblem(ContinuousProblem): ## Advection operator for the transport problem self.advecOp = advection self.addOperator([self.advecOp]) - if advection != None: - self.addOperator([self.advecOp]) if domains != None: self.addDomain(domains) if variables != None: @@ -61,7 +59,7 @@ class ContinuousTransportProblem(ContinuousProblem): self.discreteProblem = DiscreteTransportProblem(advection=self.advecOp.discreteOperator) self.discreteProblem.addDomain([d.discreteDomain for d in self.domains]) self.discreteProblem.addVariable([v.discreteVariable for v in self.variables]) - self.discreteProblem.addOperator([o.discreteOperator for o in self.operators]) + self.discreteProblem.addOperator([o.discreteOperator for o in self.operators], [self.operators.index(o) for o in self.operators]) def writeToFile(self, fileStr): """ diff --git a/parmepy/new_ParMePy/Problem/DiscreteProblem.py b/parmepy/new_ParMePy/Problem/DiscreteProblem.py index c57b6b612..666f3e7be 100644 --- a/parmepy/new_ParMePy/Problem/DiscreteProblem.py +++ b/parmepy/new_ParMePy/Problem/DiscreteProblem.py @@ -50,17 +50,33 @@ class DiscreteProblem: if self.domains.count(v.domain) == 0: self.domains.append(v.domain) - def addOperator(self, cOperator): + def addOperator(self, cOperator, index=None): """ Add a DiscreteOperator.DiscreteOperator to the problem. Also add operators' variables and variables' domains to the problem. @param cOperator DiscreteOperator.DiscreteOperator : list od operators to add. """ - for o in cOperator: - if self.operators.count(o) == 0: - self.operators.append(o) - for v in o.variables: + if isinstance(cOperator, list): + for i,o in enumerate(cOperator): + if self.operators.count(o) == 0: + try: + self.operators.insert(index[i],o) + except TypeError: + self.operators.append(o) + for v in o.variables: + if self.variables.count(v) == 0: + self.variables.append(v) + if self.domains.count(v.domain) == 0: + self.domains.append(v.domain) + else: + if self.operators.count(cOperator) == 0: + print index + try: + self.operators.insert(index, cOperator) + except TypeError: + self.operators.append(cOperator) + for v in cOperator.variables: if self.variables.count(v) == 0: self.variables.append(v) if self.domains.count(v.domain) == 0: diff --git a/parmepy/new_ParMePy/Problem/DiscreteTransportProblem.py b/parmepy/new_ParMePy/Problem/DiscreteTransportProblem.py index dc5b5e898..438e6df5b 100644 --- a/parmepy/new_ParMePy/Problem/DiscreteTransportProblem.py +++ b/parmepy/new_ParMePy/Problem/DiscreteTransportProblem.py @@ -38,7 +38,7 @@ class DiscreteTransportProblem(DiscreteProblem): self.splittingStep = None def step(self, dt): - [[op.applyOperator(self.t, dt * i[1], i[0]) for op in self.operators] for i in self.splittingStep] + [c_step[0].applyOperator(self.t, dt * c_step[1], c_step[2]) for c_step in self.operator_list] self.t += dt def solve(self, T, dt): @@ -49,18 +49,21 @@ class DiscreteTransportProblem(DiscreteProblem): @param T : Simulation final time. @param dt : Simulation time step. """ + print "\n=== Solving ===" ite = 0 if not self.printer is None: while self.t < T: self.step(dt) ite += 1 - self.printer.printStep() #print "iteration ", ite, "\t t =", self.t + self.printer.printStep() else: while self.t < T: self.step(dt) ite += 1 #print "Iteration ", ite, "\t t =", self.t, "\t(No output mode)" + print "\n=== End Solving ===" + print "\n=== Timings ===" c_time = 0. q_time = 0. s_time = 0. diff --git a/parmepy/new_ParMePy/Utils/GPUParticularSolver.py b/parmepy/new_ParMePy/Utils/GPUParticularSolver.py index 04ef7801e..8e0442e1f 100644 --- a/parmepy/new_ParMePy/Utils/GPUParticularSolver.py +++ b/parmepy/new_ParMePy/Utils/GPUParticularSolver.py @@ -25,13 +25,16 @@ class GPUParticularSolver(ParticularSolver): InterpolationMethod, RemeshingMethod, splittingMethod='strang', - platform_id=0, device_id=0, device_type='gpu'): + platform_id=0, device_id=0, + device_type='gpu', + src=None): """ Constructor. Create a solver description. """ ParticularSolver.__init__(self, problem, ODESolver, InterpolationMethod, RemeshingMethod, splittingMethod) - self.discreteProblem = problem.discreteProblem + self.parameters_initialization() + self.user_src = src #Get platform. self.platform = cl.get_platforms()[platform_id] #Get device. @@ -45,6 +48,7 @@ class GPUParticularSolver(ParticularSolver): #Create CommandQueue on the GPU Context self.queue = cl.CommandQueue(self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) + def parameters_initialization(self): # Modifying domains parameters to fit with float4 or int4 opencl objects self.gpu_shape = tuple(self.grid.elementNumber) padding_float = np.zeros(4 - self.grid.dimension, dtype=dtype_real_gpu) @@ -53,84 +57,168 @@ class GPUParticularSolver(ParticularSolver): self.grid.max = np.concatenate((self.grid.max, padding_float)).astype(dtype_real_gpu) self.grid.length = np.concatenate((self.grid.length, padding_float)).astype(dtype_real_gpu) self.grid.elementSize = np.concatenate((self.grid.elementSize, padding_float)).astype(dtype_real_gpu) - self.grid.elementNumber = np.concatenate((self.grid.elementNumber, padding_uint)).astype(dtype_integer) - self.gvelo.values = np.asarray(self.gvelo.values, dtype=dtype_real_gpu) - self.gscal.values = np.asarray(self.gscal.values, dtype=dtype_real_gpu) - self.ppos.values = np.asarray(self.ppos.values, dtype=dtype_real_gpu) - self.pscal.values = np.asarray(self.pscal.values, dtype=dtype_real_gpu) + self.grid.elementNumber = np.concatenate((self.grid.elementNumber, padding_uint+1)).astype(dtype_integer) - # OpenCL Buffer allocations + def buffer_allocations(self): self.gpu_used_mem = 0 - self.gvelo.gpu_mem_object = cl.Buffer(self.ctx, - cl.mem_flags.READ_WRITE, - size=self.gvelo.values.nbytes) - self.gpu_used_mem += self.gvelo.gpu_mem_object.size - print "Allocation grid velocity on gpu, size:", self.gvelo.gpu_mem_object.size, 'B' - self.gscal.gpu_mem_object = cl.Buffer(self.ctx, - cl.mem_flags.READ_WRITE, - size=self.gscal.values.nbytes) - self.gpu_used_mem += self.gscal.gpu_mem_object.size - print "Allocation grid scalar on gpu, size:", self.gscal.gpu_mem_object.size, 'B' - self.ppos.gpu_mem_object = cl.Buffer(self.ctx, - cl.mem_flags.READ_WRITE, - size=self.ppos.values.nbytes) - self.gpu_used_mem += self.ppos.gpu_mem_object.size - print "Allocation particles positions on gpu, size:", self.ppos.gpu_mem_object.size, 'B' - self.pscal.gpu_mem_object = cl.Buffer(self.ctx, - cl.mem_flags.READ_WRITE, - size=self.pscal.values.nbytes) - self.gpu_used_mem += self.pscal.gpu_mem_object.size - print "Allocation particles scalar on gpu, size:", self.pscal.gpu_mem_object.size, 'B' + for v in self.discreteProblem.variables: + v.values = np.asarray(v.values, dtype=dtype_real_gpu) + v.gpu_mem_object = cl.Buffer(self.ctx, + cl.mem_flags.READ_WRITE, + size=v.values.nbytes + ) + self.gpu_used_mem += v.gpu_mem_object.size + print "Allocation " + v.name + " on gpu, size:", v.gpu_mem_object.size * 1e-6, 'MBytes' + + if debug > 0: + debug_size = 0 + self.discreteProblem.advecOp.debug_integer = np.zeros(debug, dtype=dtype_integer) + self.discreteProblem.advecOp.debug_integer_buffer = cl.Buffer( + self.ctx, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, + hostbuf=self.discreteProblem.advecOp.debug_integer) + debug_size += self.discreteProblem.advecOp.debug_integer_buffer.size + self.discreteProblem.advecOp.debug_float = np.zeros(debug, dtype=dtype_real_gpu) + self.discreteProblem.advecOp.debug_float_buffer = cl.Buffer( + self.ctx, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, + hostbuf=self.discreteProblem.advecOp.debug_float) + debug_size += self.discreteProblem.advecOp.debug_float_buffer.size + self.gpu_used_mem += debug_size + print "Allocation debug buffer on gpu, size:", debug_size, 'B' + + def initialize(self): + """ + Solver initialisation for a given DiscreteProblem.DiscreteProblem. + + @param discreteProblem : Problem to initialize. + """ + # OpenCL Buffer allocations + print "\n=== Device memory allocations ===" + self.buffer_allocations() p = self.gpu_used_mem * 100 / (self.device.global_mem_size * 1.) - print "Total memory allocated on gpu :", self.gpu_used_mem, "B (", p, " % of total available memory)" + print "Total memory allocated on gpu :", self.gpu_used_mem * 1e-6, "MBytes (", p, " % of total available memory)" # Kernels creation/compilation + print "\n=== Kernel sources compiling ===" f = open(os.path.join(os.path.split(os.path.abspath(__file__))[0], "gpu_src.cl"), 'r') self.gpu_src = "".join(f.readlines()) f.close() + if not self.user_src is None: + f = open(self.user_src, 'r') + self.gpu_src += "".join(f.readlines()) + f.close() build_options = "-cl-single-precision-constant -cl-opt-disable" build_options += " -D DIM=" + str(self.dim) + build_options += " -D DEBUG=" + str(debug) build_options += " -D MAX_LINE_POINTS=" + str(np.max(self.grid.elementNumber)) self.prg = cl.Program(self.ctx, self.gpu_src).build(build_options) print self.prg.get_build_info(self.device, cl.program_build_info.LOG) print self.prg.get_build_info(self.device, cl.program_build_info.OPTIONS) print self.prg.get_build_info(self.device, cl.program_build_info.STATUS) - self.discreteProblem.advecOp.gpu_queue = self.queue - self.discreteProblem.advecOp.gpu_shape = self.gpu_shape - self.discreteProblem.advecOp.gpu_kernel = self.prg.advection - self.remesh.gpu_queue = self.queue - self.remesh.gpu_shape = self.gpu_shape - self.remesh.gpu_kernel = self.prg.remeshing - def initialize(self): - """ - Solver initialisation for a given DiscreteProblem.DiscreteProblem. + for op in self.discreteProblem.operators: + for k in self.prg.all_kernels(): + if op.gpu_kernel_name == k.get_info(cl.kernel_info.FUNCTION_NAME): + op.gpu_kernel = self.prg + op.gpu_queue = self.queue + op.gpu_shape = self.gpu_shape - @param discreteProblem : Problem to initialize. - """ - self.gvelo.init() - self.gscal.init() - evt_gvelo = cl.enqueue_copy(self.queue, self.gvelo.gpu_mem_object, self.gvelo.values) - evt_gscal = cl.enqueue_copy(self.queue, self.gscal.gpu_mem_object, self.gscal.values) - evt_ppos = cl.enqueue_copy(self.queue, self.ppos.gpu_mem_object, self.ppos.values) - evt_pscal = cl.enqueue_copy(self.queue, self.pscal.gpu_mem_object, self.pscal.values) + print "\n=== Data initialization ===" + for v in self.discreteProblem.variables: + v_is_init = False + print v.name, + for i,k in enumerate(self.prg.all_kernels()): + if ('init' + v.name) == k.get_info(cl.kernel_info.FUNCTION_NAME): + print 'Initialization Kernel found ', + evt = eval("self.prg."+"init"+v.name)(self.queue, + self.gpu_shape, + None, + v.gpu_mem_object, + dtype_real_gpu(0.), + v.domain.min.astype(dtype_real_gpu), + v.domain.elementNumber.astype(dtype_integer), + v.domain.elementSize.astype(dtype_real_gpu) + ) + self.queue.finish() + print 'compute time :', (evt.profile.end - evt.profile.start) * 1e-9, 's', + v.contains_data, v_is_init = False, True + if not v_is_init: + v.init() + print '... Done' + + print "\n=== Data Transfers host->device ===" + copy_evts = [] + transfered_size = 0 + for v in self.discreteProblem.variables: + if v.contains_data: + print v.name, + if v.dimension == self.dim: + print "Memoire arrangée", + offset = 0 + evt = cl.enqueue_copy(self.queue, v.gpu_mem_object, + v.values[..., 0].ravel(), + device_offset=np.long(offset)) + copy_evts.append(evt) + offset += v.values[..., 0].nbytes + evt = cl.enqueue_copy(self.queue, v.gpu_mem_object, + v.values[..., 1].swapaxes(1, 0).ravel(), + device_offset=np.long(offset)) + copy_evts.append(evt) + if self.dim == 3: + offset += v.values[..., 1].nbytes + evt = cl.enqueue_copy(self.queue, v.gpu_mem_object, + v.values[..., 2].swapaxes(1, 0).swapaxes(2, 0).ravel(), + device_offset=np.long(offset)) + copy_evts.append(evt) + else: + print "Normal layout", + evt = cl.enqueue_copy(self.queue, v.gpu_mem_object, v.values) + copy_evts.append(evt) + print "Transfering", v.values.nbytes * 1e-6, "MBytes ..." + transfered_size += v.values.nbytes self.queue.finish() - t = 0. - for evt in [evt_gvelo, evt_gscal, evt_ppos, evt_pscal]: - t += (evt.profile.end - evt.profile.start) - print "Transfering host -> device :", t * 1e-9, "s \t", self.gpu_used_mem / t, "GB/s" + if len(copy_evts)>0: + t = 0. + for evt in copy_evts: + t += (evt.profile.end - evt.profile.start) + print "Transfering host -> device :", t * 1e-9, "s \t", transfered_size / t, "GB/s" + else: + print "No transfers" def collect_data(self): self.queue.finish() - evt_gvelo = cl.enqueue_copy(self.queue, self.gvelo.values, self.gvelo.gpu_mem_object) - evt_gscal = cl.enqueue_copy(self.queue, self.gscal.values, self.gscal.gpu_mem_object) - evt_ppos = cl.enqueue_copy(self.queue, self.ppos.values, self.ppos.gpu_mem_object) - evt_pscal = cl.enqueue_copy(self.queue, self.pscal.values, self.pscal.gpu_mem_object) + print "\n=== Data Transfers device->host ===" + copy_evts = [] + transfered_size = 0 + for v in self.discreteProblem.variables: + if v.contains_data: + print v.name, + if v.dimension == self.dim: + print "Memoire arrangée", + temp = np.empty(self.gpu_shape, dtype=dtype_real_gpu) + offset = 0 + evt = cl.enqueue_copy(self.queue, temp, v.gpu_mem_object, device_offset=np.long(offset)) + copy_evts.append(evt) + v.values[..., 0] = temp.reshape(self.gpu_shape) + offset += temp.nbytes + evt = cl.enqueue_copy(self.queue, temp, v.gpu_mem_object, device_offset=np.long(offset)) + copy_evts.append(evt) + v.values[..., 1] = temp.reshape(self.gpu_shape).swapaxes(1, 0) + if self.dim == 3: + offset += temp.nbytes + evt = cl.enqueue_copy(self.queue, temp, v.gpu_mem_object, device_offset=np.long(offset)) + copy_evts.append(evt) + v.values[..., 2] = temp.reshape(self.gpu_shape).swapaxes(2, 0).swapaxes(1, 0) + else: + print "Normal layout", + evt = cl.enqueue_copy(self.queue, v.values, v.gpu_mem_object) + copy_evts.append(evt) + print "Transfering", v.values.nbytes * 1e-6, "MBytes ..." + transfered_size += v.values.nbytes self.queue.finish() t = 0. - for evt in [evt_gvelo, evt_gscal, evt_ppos, evt_pscal]: + for evt in copy_evts: t += (evt.profile.end - evt.profile.start) - print "Transfering device -> host :", t * 1e-9, "s \t", self.gpu_used_mem/t, "GB/s" + print "Transfering device -> host :", t * 1e-9, "s \t", transfered_size / t, "GB/s" def terminate(self): self.queue.finish() diff --git a/parmepy/new_ParMePy/Utils/GPUParticularSolver_GLRender.py b/parmepy/new_ParMePy/Utils/GPUParticularSolver_GLRender.py index 957f9757f..f9fa2e8ba 100644 --- a/parmepy/new_ParMePy/Utils/GPUParticularSolver_GLRender.py +++ b/parmepy/new_ParMePy/Utils/GPUParticularSolver_GLRender.py @@ -8,18 +8,21 @@ import sys import pyopencl as cl from ..Param import * from ParticularSolver import ParticularSolver +from GPUParticularSolver import GPUParticularSolver from ..Domain.ParticleField import ParticleField from ..Variable.DiscreteVariable import DiscreteVariable from ..Operator.RemeshingDOp import RemeshingDOp from ..Operator.TagDOp import TagDOp from ..Operator.InterpolationDOp import InterpolationDOp from ..Operator.RemeshingDOp import RemeshingDOp +from ..Operator.VolumeDOp import VolumeDOp from OpenGL.GL import * from OpenGL.GLU import * from OpenGL.GLUT import * +import time -class GPUParticularSolver_GLRender(ParticularSolver): +class GPUParticularSolver_GLRender(GPUParticularSolver): """ GPU Particular solver description. Link with differents numericals methods used. Prepare GPU side (memory, kernels, ...) @@ -29,55 +32,30 @@ class GPUParticularSolver_GLRender(ParticularSolver): InterpolationMethod, RemeshingMethod, splittingMethod='strang', - platform_id=0, device_id=0, device_type='gpu', dt=1.): + platform_id=0, + device_id=0, + device_type='gpu', + dt=1., + src=None): """ Constructor. Create a solver description. """ ParticularSolver.__init__(self, problem, ODESolver, InterpolationMethod, RemeshingMethod, splittingMethod) - self.discreteProblem = problem.discreteProblem self.dt = dt + self.parameters_initialization() + self.user_src = src - # Modifying domains parameters to fit with float4 or int4 opencl objects - self.gpu_shape = tuple(self.grid.elementNumber) - padding_float = np.zeros(4 - self.grid.dimension, dtype=dtype_real_gpu) - padding_uint = np.zeros(4 - self.grid.dimension, dtype=dtype_integer) - self.grid.min = np.concatenate((self.grid.min, padding_float)).astype(dtype_real_gpu) - self.grid.max = np.concatenate((self.grid.max, padding_float)).astype(dtype_real_gpu) - self.grid.length = np.concatenate((self.grid.length, padding_float)).astype(dtype_real_gpu) - self.grid.elementSize = np.concatenate((self.grid.elementSize, padding_float)).astype(dtype_real_gpu) - self.grid.elementNumber = np.concatenate((self.grid.elementNumber, padding_uint)).astype(dtype_integer) - self.gvelo.values = np.asarray(self.gvelo.values, dtype=dtype_real_gpu) - self.gscal.values = np.asarray(self.gscal.values, dtype=dtype_real_gpu) - self.ppos.values = np.asarray(self.ppos.values, dtype=dtype_real_gpu) - self.pscal.values = np.asarray(self.pscal.values, dtype=dtype_real_gpu) + self.volume = DiscreteVariable(domain=self.grid, spec=self.gpu_shape[1:], name="Volume") + self.volumeDOp = VolumeDOp(self.volume, self.pscal, + np.prod(self.grid.elementSize[0:self.dim]), level=0.5) + self.discreteProblem.addOperator(self.volumeDOp) + self.discreteProblem.operator_list.append((self.volumeDOp, 1.0, 0)) - #mouse handling for transforming scene - self.mouse_down = False - self.mouse_old = np.array([0., 0.]) - self.rotate = np.array([0., 0., 0.]) - self.translate = -(self.grid.min + self.grid.length / 2.)[0:3] - self.initrans = np.array([0., 0., -2.]) glutInit(sys.argv) - width, height = 800, 800 - glutInitWindowSize(width, height) + glutInitWindowSize(self.width, self.height) glutInitWindowPosition(0, 0) - glutCreateWindow('OpenCL/OpenGL Parmepy (' + str(np.prod(self.grid.elementNumber)) + ' particles)') - glutDisplayFunc(self.display) - - glViewport(0, 0, width, height) - glMatrixMode(GL_PROJECTION) - glLoadIdentity() - gluPerspective(60.0, width / float(height), .1, 8192) - glMatrixMode(GL_MODELVIEW) - - #handle user input - glutKeyboardFunc(self.on_key) - glutMouseFunc(self.on_click) - glutMotionFunc(self.on_mouse_motion) - - #this will call draw every 100ms - glutTimerFunc(100, self.timer, 100) + glutCreateWindow('OpenCL/OpenGL Parmepy (' + str(np.prod(self.grid.elementNumber[0:self.grid.dimension])) + ' particles)') from pyopencl.tools import get_gl_sharing_context_properties if sys.platform == "darwin": @@ -98,12 +76,11 @@ class GPUParticularSolver_GLRender(ParticularSolver): self.device = self.ctx.devices[0] self.queue = cl.CommandQueue(self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) - glClearColor(1, 1, 1, 1) - glColor(0, 0, 1) - - self.vbo_ppos = glGenBuffers(1) - glBindBuffer(GL_ARRAY_BUFFER, self.vbo_ppos) - glBufferData(GL_ARRAY_BUFFER, self.ppos.values.nbytes, None, GL_DYNAMIC_DRAW) + self.points = DiscreteVariable(domain=self.parts, dimension=self.dim) + self.points.values = np.asarray(self.points.values, dtype=dtype_real_gpu) + self.vbo_points = glGenBuffers(1) + glBindBuffer(GL_ARRAY_BUFFER, self.vbo_points) + glBufferData(GL_ARRAY_BUFFER, self.points.values.nbytes, None, GL_STREAM_DRAW) glEnableClientState(GL_VERTEX_ARRAY) glVertexPointer(self.dim, GL_FLOAT, 0, None) @@ -111,58 +88,34 @@ class GPUParticularSolver_GLRender(ParticularSolver): self.pcolor.values = np.asarray(self.pcolor.values, dtype=dtype_real_gpu) self.vbo_color = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, self.vbo_color) - glBufferData(GL_ARRAY_BUFFER, self.pcolor.values.nbytes, None, GL_DYNAMIC_DRAW) + glBufferData(GL_ARRAY_BUFFER, self.pcolor.values.nbytes, None, GL_STREAM_DRAW) glEnableClientState(GL_COLOR_ARRAY) glColorPointer(4, GL_FLOAT, 0, None) - # OpenCL Buffer allocations - self.gpu_used_mem = 0 - self.gvelo.gpu_mem_object = cl.Buffer(self.ctx, - cl.mem_flags.READ_WRITE, - size=self.gvelo.values.nbytes) - self.gpu_used_mem += self.gvelo.gpu_mem_object.size - print "Allocation grid velocity on gpu, size:", self.gvelo.gpu_mem_object.size, 'B' - self.gscal.gpu_mem_object = cl.Buffer(self.ctx, - cl.mem_flags.READ_WRITE, - size=self.gscal.values.nbytes) - self.gpu_used_mem += self.gscal.gpu_mem_object.size - print "Allocation grid scalar on gpu, size:", self.gscal.gpu_mem_object.size, 'B' - self.ppos.gpu_mem_object = cl.GLBuffer(self.ctx, + def parameters_initialization(self): + GPUParticularSolver.parameters_initialization(self) + #mouse handling for transforming scene + self.mouse_down = False + self.mouse_old = np.array([0., 0.]) + self.rotate = np.array([0., 0., 0.]) + self.translate = -(self.grid.min + self.grid.length / 2.)[0:3] + self.initrans = np.array([0., 0., -2.]) + self.width, self.height = 800, 800 + + def buffer_allocations(self): + GPUParticularSolver.buffer_allocations(self) + # GL buffer allocation + self.points.gpu_mem_object = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, - int(self.vbo_ppos)) + int(self.vbo_points)) self.gpu_used_mem += self.ppos.gpu_mem_object.size - print "Allocation particles positions on gpu, size:", self.ppos.gpu_mem_object.size, 'B' - self.pscal.gpu_mem_object = cl.Buffer(self.ctx, - cl.mem_flags.READ_WRITE, - size=self.pscal.values.nbytes) - self.gpu_used_mem += self.pscal.gpu_mem_object.size - print "Allocation particles scalar on gpu, size:", self.pscal.gpu_mem_object.size, 'B' - self.pvelo.gpu_mem_object = cl.GLBuffer(self.ctx, + print "Allocation points colors to render, size:", self.ppos.gpu_mem_object.size, 'B' + self.pcolor.gpu_mem_object = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, int(self.vbo_color)) self.gpu_used_mem += self.pscal.gpu_mem_object.size print "Allocation particles color on gpu, size:", self.pscal.gpu_mem_object.size, 'B' - p = self.gpu_used_mem * 100 / (self.device.global_mem_size * 1.) - print "Total memory allocated on gpu :", self.gpu_used_mem, "B (", p, " % of total available memory)" - self.gl_objects = [self.ppos.gpu_mem_object, self.pvelo.gpu_mem_object] - - # Kernels creation/compilation - f = open(os.path.join(os.path.split(os.path.abspath(__file__))[0], "gpu_src.cl"), 'r') - self.gpu_src = "".join(f.readlines()) - f.close() - build_options = "-cl-single-precision-constant -cl-opt-disable" - build_options += " -D DIM=" + str(self.dim) - build_options += " -D MAX_LINE_POINTS=" + str(np.max(self.grid.elementNumber)) - self.prg = cl.Program(self.ctx, self.gpu_src).build(build_options) - print self.prg.get_build_info(self.device, cl.program_build_info.LOG) - print self.prg.get_build_info(self.device, cl.program_build_info.OPTIONS) - print self.prg.get_build_info(self.device, cl.program_build_info.STATUS) - self.discreteProblem.advecOp.gpu_queue = self.queue - self.discreteProblem.advecOp.gpu_shape = self.gpu_shape - self.discreteProblem.advecOp.gpu_kernel = self.prg.advection - self.remesh.gpu_queue = self.queue - self.remesh.gpu_shape = self.gpu_shape - self.remesh.gpu_kernel = self.prg.remeshing + self.gl_objects = [self.points.gpu_mem_object, self.pcolor.gpu_mem_object] ###GL CALLBACKS def timer(self, t): @@ -197,11 +150,28 @@ class GPUParticularSolver_GLRender(ParticularSolver): def display(self): cl.enqueue_acquire_gl_objects(self.queue, self.gl_objects) + display_step = time.time() - self.display_clock + self.display_clock = time.time() + t = time.time() self.discreteProblem.step(self.dt) - self.queue.finish() - self.prg.colorize(self.queue, self.gpu_shape, self.pscal.gpu_mem_object, self.pvelo.gpu_mem_object) + l = list(self.gpu_shape) + splitting_dir = 1 + nb_ligne = l.pop(splitting_dir) + self.prg.colorize(self.queue, tuple(l), None, + self.pscal.gpu_mem_object, + self.pcolor.gpu_mem_object, + self.points.gpu_mem_object, + dtype_integer(splitting_dir), + dtype_integer(nb_ligne), + self.grid.min, + self.grid.elementNumber.astype(dtype_integer), + self.grid.elementSize + ) + glutSetWindowTitle("OpenCL/OpenGL Parmepy T={0:8.4f} ( {1} particles)".format( + self.discreteProblem.t, np.prod(self.grid.elementNumber[0:self.grid.dimension]))) cl.enqueue_release_gl_objects(self.queue, self.gl_objects) self.queue.finish() + print "Compute time :", (time.time() - t) * 1e3, "ms, \t over ", display_step * 1e3, "ms, \t", 1. / (time.time() - t), "fps" glFlush() glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) @@ -221,7 +191,7 @@ class GPUParticularSolver_GLRender(ParticularSolver): glBindBuffer(GL_ARRAY_BUFFER, self.vbo_color) glColorPointer(4, GL_FLOAT, 0, None) - glBindBuffer(GL_ARRAY_BUFFER, self.vbo_ppos) + glBindBuffer(GL_ARRAY_BUFFER, self.vbo_points) glVertexPointer(self.dim, GL_FLOAT, 0, None) glEnableClientState(GL_VERTEX_ARRAY) @@ -254,34 +224,31 @@ class GPUParticularSolver_GLRender(ParticularSolver): @param discreteProblem : Problem to initialize. """ - self.gvelo.init() - self.gscal.init() - evt_gvelo = cl.enqueue_copy(self.queue, self.gvelo.gpu_mem_object, self.gvelo.values) - evt_gscal = cl.enqueue_copy(self.queue, self.gscal.gpu_mem_object, self.gscal.values) - evt_ppos = cl.enqueue_copy(self.queue, self.ppos.gpu_mem_object, self.ppos.values) - evt_pscal = cl.enqueue_copy(self.queue, self.pscal.gpu_mem_object, self.pscal.values) - self.queue.finish() - t = 0. - for evt in [evt_gvelo, evt_gscal, evt_ppos, evt_pscal]: - t += (evt.profile.end - evt.profile.start) - print "Transfering host -> device :", t * 1e-9, "s \t", self.gpu_used_mem / t, "GB/s" - print "Début de la boucle GLUT. ('q' ou 'ESC' pour quiter)" - self.discreteProblem.solve = glutMainLoop() + GPUParticularSolver.initialize(self) - def collect_data(self): - self.queue.finish() - evt_gvelo = cl.enqueue_copy(self.queue, self.gvelo.values, self.gvelo.gpu_mem_object) - evt_gscal = cl.enqueue_copy(self.queue, self.gscal.values, self.gscal.gpu_mem_object) - evt_ppos = cl.enqueue_copy(self.queue, self.ppos.values, self.ppos.gpu_mem_object) - evt_pscal = cl.enqueue_copy(self.queue, self.pscal.values, self.pscal.gpu_mem_object) - self.queue.finish() - t = 0. - for evt in [evt_gvelo, evt_gscal, evt_ppos, evt_pscal]: - t += (evt.profile.end - evt.profile.start) - print "Transfering device -> host :", t * 1e-9, "s \t", self.gpu_used_mem/t, "GB/s" + glutDisplayFunc(self.display) + glViewport(0, 0, self.width, self.height) + glMatrixMode(GL_PROJECTION) + glLoadIdentity() + gluPerspective(np.max(self.grid.length) * 30., self.width / float(self.height), 0.1, 4.) + glMatrixMode(GL_MODELVIEW) - def terminate(self): - self.queue.finish() + #handle user input + glutKeyboardFunc(self.on_key) + glutMouseFunc(self.on_click) + glutMotionFunc(self.on_mouse_motion) + + #this will call draw every 10ms + self.display_clock = time.time() + self.display_time_step = 10 + glutTimerFunc(self.display_time_step, self.timer, self.display_time_step) + + glClearColor(1, 1, 1, 1) + glColor(0, 0, 1) + + print "\n=== Solving ===" + print "Début de la boucle GLUT. ('q' ou 'ESC' pour quiter)" + self.discreteProblem.solve = glutMainLoop() def __str__(self): """ToString method""" diff --git a/parmepy/new_ParMePy/Utils/ParticularSolver.py b/parmepy/new_ParMePy/Utils/ParticularSolver.py index c61fa2660..714c18f9e 100644 --- a/parmepy/new_ParMePy/Utils/ParticularSolver.py +++ b/parmepy/new_ParMePy/Utils/ParticularSolver.py @@ -44,10 +44,10 @@ class ParticularSolver(Solver): self.gvelo = self.discreteProblem.advecOp.velocity self.gscal = self.discreteProblem.advecOp.scalar self.parts = ParticleField(self.grid) - self.ppos = DiscreteVariable(domain=self.parts, dimension=self.gvelo.dimension) + self.ppos = DiscreteVariable(domain=self.parts, dimension=self.gvelo.dimension, name="Particle position") self.parts.setPositions(self.ppos) - self.pvelo = DiscreteVariable(domain=self.parts, dimension=self.gvelo.dimension) - self.pscal = DiscreteVariable(domain=self.parts, dimension=self.gscal.dimension, scalar=True) + self.pvelo = DiscreteVariable(domain=self.parts, dimension=self.gvelo.dimension, name="Particle velocity") + self.pscal = DiscreteVariable(domain=self.parts, dimension=self.gscal.dimension, scalar=True, name="Particle scalar") #self.pbloc = DiscreteVariable(domain=self.parts, dimension=1, integer=True, initialValue=lambda x: 0, scalar=True) #self.ptag = DiscreteVariable(domain=self.parts, dimension=1, integer=True, initialValue=lambda x: 0, scalar=True) # Interpolation : particles positions, grid velocity -> particles velocity @@ -83,7 +83,9 @@ class ParticularSolver(Solver): [splitting.append((self.dim - 2 - i, 0.5)) for i in xrange(self.dim - 1)] else: raise ValueError("Unknown splitting method : ", self.splittingMethod) - self.discreteProblem.splittingStep = splitting + self.discreteProblem.operator_list = [] + [self.discreteProblem.operator_list.append((op, 1.0, 0)) for op in self.discreteProblem.operators if not op.is_splittable] + [[self.discreteProblem.operator_list.append((op, i[1], i[0])) for op in self.discreteProblem.operators if op.is_splittable] for i in splitting] def initialize(self): """ @@ -111,7 +113,6 @@ class ParticularSolver(Solver): self.gvelo.init() self.gscal.init() - def __str__(self): """ToString method""" s = " Particular solver " diff --git a/parmepy/new_ParMePy/Utils/Printer.py b/parmepy/new_ParMePy/Utils/Printer.py index 376775087..787773559 100644 --- a/parmepy/new_ParMePy/Utils/Printer.py +++ b/parmepy/new_ParMePy/Utils/Printer.py @@ -43,14 +43,14 @@ class Printer(): def _printStep(self): self.skip -= 1 if self.skip == 0: + if isinstance(self.problem.discreteProblem.solver, GPUParticularSolver) and self.ite > 0: + self.problem.discreteProblem.solver.collect_data() print "Writing file : " + self.outputPrefix + "results_{0:05d}.dat".format(self.ite) self.skip = self.freq f = open(self.outputPrefix + "results_{0:05d}.dat".format(self.ite), 'w') dim = self.grid.dimension f.write("# iteration : {0}, time = {1}".format(self.ite, self.problem.discreteProblem.t)) f.write("# gid({0}) gpos({1}) gvelo({2}) gscal(1) ppos({3}) pvelo({4}) pscal(1) pbloc(1) ptag(1)\n".format(dim, dim, dim, dim, dim)) - if isinstance(self.problem.discreteProblem.solver, GPUParticularSolver): - self.problem.discreteProblem.solver.collect_data() if self.grid.dimension == 1: for x in xrange(self.grid.elementNumber[0]): f.write(" {0:6d}".format(x)) diff --git a/parmepy/new_ParMePy/Utils/gpu_src.cl b/parmepy/new_ParMePy/Utils/gpu_src.cl index 8be8bc5c3..7c277193e 100644 --- a/parmepy/new_ParMePy/Utils/gpu_src.cl +++ b/parmepy/new_ParMePy/Utils/gpu_src.cl @@ -1,3 +1,4 @@ + __private float fmod_rtn(__private float x, __private float y); __private float remquo_rtn(__private float x, __private float y, __private int *quo); void advection_init_3_0(__private float4 size, __private float *line_c); @@ -151,11 +152,8 @@ __kernel void advection(__global const float* gvelo, __private const float4 size ) { - __private uint i; // loop index __private uint p; // Particle line loop index __private uint stride_vec; // Buffer stride between two components of the same particle - __private uint stride_along_dir, stride_along_dir_scal; // Buffer stride between two particle of the same line - __private uint ind_line, ind_line_scal; // Line begin buffer index __private uint i_ind; // Interpolation left point buffer index __private uint i_ind_p; // Interpolation right point buffer index __private int ind_c; // Interpolation grid point @@ -165,6 +163,7 @@ __kernel void advection(__global const float* gvelo, __private float ppos_c; // Particle position temp __private float line_c[DIM]; // Particle final position __private float gvelo_buffer_loc[MAX_LINE_POINTS]__attribute__((aligned)); // Local buffer to store gvelo for the current line + __private uint ind_m, stride_m; // Compute strides and index. Depend on array order (C order here) stride_vec = 1; // Les composantes des vecteurs sont à la suite dans les buffers dx_dir = size[dir]; @@ -172,43 +171,46 @@ __kernel void advection(__global const float* gvelo, { if (dir == 0) { - kernel_init_3_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + //kernel_init_3_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); advection_init_3_0(size, line_c); + ind_m = (nb[0]*nb[1]*nb[2])*dir+get_global_id(0)*nb[2]+get_global_id(1); + stride_m = nb[1]*nb[2]; } else if (dir == 1) { - kernel_init_3_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + //kernel_init_3_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); advection_init_3_1(size, line_c); + ind_m = (nb[0]*nb[1]*nb[2])*dir+get_global_id(0)*nb[2]+get_global_id(1); + stride_m = nb[0]*nb[2]; } else { - kernel_init_3_2(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + //kernel_init_3_2(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); advection_init_3_2(size, line_c); + ind_m = (nb[0]*nb[1]*nb[2])*dir+get_global_id(0)*nb[1]+get_global_id(1); + stride_m = nb[0]*nb[1]; } } else { if (dir == 0) { - kernel_init_2_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + //kernel_init_2_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); advection_init_2_0(size, line_c); + ind_m = (nb[0]*nb[1])*dir+get_global_id(0); + stride_m = nb[1]; } else { - kernel_init_2_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + //kernel_init_2_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); advection_init_2_1(size, line_c); + ind_m = (nb[0]*nb[1])*dir+get_global_id(0); + stride_m = nb[0]; } } - // Particles scalar initialisation - for(p=0;p<line_nb_pts;p++) - gvelo_buffer_loc[p] = gscal[ind_line_scal + p*stride_along_dir_scal]; // Read : 1float - for(p=0;p<line_nb_pts;p++) - pscal[ind_line_scal + p*stride_along_dir_scal] = gvelo_buffer_loc[p]; // Write : 1float - //Get velocity local copy - i = ind_line + dir*stride_vec; for(p=0;p<line_nb_pts;p++) - gvelo_buffer_loc[p] = gvelo[i + p*stride_along_dir]; // Read: 1float + gvelo_buffer_loc[p] = gvelo[ind_m + p*stride_m]; // Advection algorithm along line for(p=0;p<line_nb_pts;p++) @@ -227,12 +229,7 @@ __kernel void advection(__global const float* gvelo, ppos_c = fma(dt, p_velo, p*dx_dir); // 4flop OpenCL : fma(a,b,c) = c+a*b // Apply boundary conditions line_c[dir] = fmod_rtn(ppos_c, length_v[dir]); //3flop - // Write result in buffer - ppos[ind_line + p*stride_along_dir] = min_v[0] + line_c[0]; // Write : 1float - ppos[ind_line + stride_vec + p*stride_along_dir] = min_v[1] + line_c[1]; // Write : 1float -#if DIM == 3 - ppos[ind_line + 2*stride_vec + p*stride_along_dir] = min_v[2] + line_c[2]; // Write : 1float -#endif + ppos[ind_m + p*stride_m] = min_v[dir] + line_c[dir]; } } @@ -263,6 +260,7 @@ __kernel void remeshing(__global const float* part, __private float pscal_c; // Particle scalar along line __private float weights[6]; // Remeshing weights __private float gscal_buffer_loc[MAX_LINE_POINTS]; // Local buffer to store gscal for the current line + __private uint ind_m, stride_m; // Compute strides and index. Depend on array order (C order here) stride_vec = 1; // Les composantes des vecteurs sont à la suite dans les buffers dx_dir = size[dir]; @@ -270,23 +268,33 @@ __kernel void remeshing(__global const float* part, if (dir == 0) { kernel_init_3_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + ind_m = (nb[0]*nb[1]*nb[2])*dir+get_global_id(0)*nb[2]+get_global_id(1); + stride_m = nb[1]*nb[2]; } else if (dir == 1) { kernel_init_3_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + ind_m = (nb[0]*nb[1]*nb[2])*dir+get_global_id(0)*nb[2]+get_global_id(1); + stride_m = nb[0]*nb[2]; } else { kernel_init_3_2(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + ind_m = (nb[0]*nb[1]*nb[2])*dir+get_global_id(0)*nb[1]+get_global_id(1); + stride_m = nb[0]*nb[1]; }} else{ if (dir == 0) { kernel_init_2_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + ind_m = (nb[0]*nb[1])*dir+get_global_id(0); + stride_m = nb[1]; } else { kernel_init_2_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + ind_m = (nb[0]*nb[1])*dir+get_global_id(0); + stride_m = nb[0]; } } @@ -296,7 +304,7 @@ __kernel void remeshing(__global const float* part, for(p=0;p<line_nb_pts;p++) { // read particle position - ppos_c = part[ind_line + p*stride_along_dir + dir*stride_vec]; // Read : 1float (4bytes) + ppos_c = part[ind_m + p*stride_m]; // Read : 1float (4bytes) // read particle scalar pscal_c = pscal[ind_line_scal + p*stride_along_dir_scal]; // Read : 1float // Compute nearest left grid point @@ -328,28 +336,123 @@ __kernel void remeshing(__global const float* part, gscal[i + p*stride_along_dir_scal] = gscal_buffer_loc[p]; // Write : 1float } + + +// volume computing +__kernel void volume(__global const float* pscal, + __global float* volume, + __private const float level, + __private const float particle_volume, + __private const int dir, + __private const uint line_nb_pts, + __private const float4 min_v, + __private const float4 max_v, + __private const uint4 nb, + __private const float4 size + ) +{ + __private uint p; // Particle line loop index + __private uint stride_vec; // Buffer stride between two components of the same particle + __private uint stride_along_dir, stride_along_dir_scal; // Buffer stride between two particle of the same line + __private uint ind_line, ind_line_scal; // Line begin buffer index + __private float dx_dir; // Space step along line + __private float pscal_c; // Particle scalar along line + // Compute strides and index. Depend on array order (C order here) + stride_vec = 1; // Les composantes des vecteurs sont à la suite dans les buffers + dx_dir = size[dir]; + + if (dir == 0) + { + kernel_init_2_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + } + else + { + kernel_init_2_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + } + + volume[ind_line_scal]= 0.0f; + for(p=0;p<line_nb_pts;p++) + { + // read particle scalar + pscal_c = pscal[ind_line_scal + p*stride_along_dir_scal]; // Read : 1float + if (pscal_c > level) + volume[ind_line_scal] += particle_volume; + } +} + + + + // Colorize scalar __kernel void colorize(__global const float* pscal, - __global float* pcolor -) + __global float* pcolor, + __global float* points, + __private const uint dir, + __private const uint line_nb_pts, + __private const float4 min, + __private const uint4 nb, + __private const float4 size + ) { - __private uint ind; - __private float color; -#if DIM==3 - __private int ix, iy, iz; - ix = get_global_id(0); - iz = get_global_id(1); - iy = get_global_id(2); - ind = iz + iy*get_global_size(2) + ix*get_global_size(2)*get_global_size(1); + __private uint p; + __private float color, dx_dir; + __private size_t stride_vec; + __private uint stride_along_dir, stride_along_dir_scal; + __private uint ind_line, ind_line_scal; + __private float line_c[DIM]; + stride_vec = 1; // Les composantes des vecteurs sont à la suite dans les buffers + dx_dir = size[dir]; + if(DIM == 3) + { + if (dir == 0) + { + kernel_init_3_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + advection_init_3_0(size, line_c); + } + else if (dir == 1) + { + kernel_init_3_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + advection_init_3_1(size, line_c); + } + else + { + kernel_init_3_2(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + advection_init_3_2(size, line_c); + } + } + else + { + if (dir == 0) + { + kernel_init_2_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + advection_init_2_0(size, line_c); + } + else + { + kernel_init_2_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal); + advection_init_2_1(size, line_c); + } + } + + for(p=0;p<line_nb_pts;p++) + { + // Write result in buffer + line_c[dir] = p*dx_dir; + points[ind_line + p*stride_along_dir] = min.s0 + line_c[0]; + points[ind_line + stride_vec + p*stride_along_dir] = min.s1 + line_c[1]; +#if DIM == 3 + points[ind_line + 2*stride_vec + p*stride_along_dir] = min.s2 + line_c[2]; #else - __private int ix, iy; - ix = get_global_id(0); - iy = get_global_id(1); - ind = iy + ix*get_global_size(1); + points[ind_line + 2*stride_vec + p*stride_along_dir] = 0.0f; #endif - color = pscal[ind]; - pcolor[4*ind + 0] = color; //Red - pcolor[4*ind + 1] = 0.0f; //Green - pcolor[4*ind + 2] = 0.0f; //Blue - pcolor[4*ind + 3] = 1.0f; //Alpha + color = pscal[ind_line_scal + p*stride_along_dir_scal]; + if (color > 0.5) + pcolor[4*ind_line_scal + 4*p*stride_along_dir_scal] = 1.0f; //Red + else + pcolor[4*ind_line_scal + 4*p*stride_along_dir_scal] = 0.0f; //Red + pcolor[4*ind_line_scal + 4*p*stride_along_dir_scal + 1] = 0.0f; //Green + pcolor[4*ind_line_scal + 4*p*stride_along_dir_scal + 2] = 0.0f; //Blue + pcolor[4*ind_line_scal + 4*p*stride_along_dir_scal + 3] = 1.0f; //Alpha + } + } diff --git a/parmepy/new_ParMePy/Variable/AnalyticalVariable.py b/parmepy/new_ParMePy/Variable/AnalyticalVariable.py index e98c209b0..6bf5c69cb 100644 --- a/parmepy/new_ParMePy/Variable/AnalyticalVariable.py +++ b/parmepy/new_ParMePy/Variable/AnalyticalVariable.py @@ -14,7 +14,7 @@ class AnalyticalVariable(ContinuousVariable): ContinuousVariable.ContinuousVariable specialization. """ - def __init__(self, dimension, domain, formula, scalar=False): + def __init__(self, dimension, domain, formula=None, scalar=False, name=""): """ Constructor. Create an AnalyticalVariable from a formula. @@ -28,6 +28,7 @@ class AnalyticalVariable(ContinuousVariable): self.formula = formula ## Is scalar variable self.scalar = scalar + self.name = name def discretize(self, d_spec=None): """ @@ -37,7 +38,7 @@ class AnalyticalVariable(ContinuousVariable): @param d_spec : discretization specifications, not used. """ if self.discreteVariable is None: - self.discreteVariable = DiscreteVariable(domain=self.domain.discreteDomain, dimension=self.dimension, initialValue=self.formula, scalar=self.scalar, spec=d_spec) + self.discreteVariable = DiscreteVariable(domain=self.domain.discreteDomain, dimension=self.dimension, initialValue=self.formula, scalar=self.scalar, spec=d_spec, name=self.name) def value(self, pos): """ diff --git a/parmepy/new_ParMePy/Variable/ContinuousVariable.py b/parmepy/new_ParMePy/Variable/ContinuousVariable.py index 7f9050ad8..5420558ca 100644 --- a/parmepy/new_ParMePy/Variable/ContinuousVariable.py +++ b/parmepy/new_ParMePy/Variable/ContinuousVariable.py @@ -25,6 +25,7 @@ class ContinuousVariable: self.dimension = dimension ## Discrete variable self.discreteVariable = None + self.name = "" def discretize(self, spec=None): """ diff --git a/parmepy/new_ParMePy/Variable/DiscreteVariable.py b/parmepy/new_ParMePy/Variable/DiscreteVariable.py index 34b6eb0b9..6a439ef82 100644 --- a/parmepy/new_ParMePy/Variable/DiscreteVariable.py +++ b/parmepy/new_ParMePy/Variable/DiscreteVariable.py @@ -11,7 +11,7 @@ class DiscreteVariable: Discrete variables abstract description """ - def __init__(self, domain=None, dimension=None, initialValue=None, integer=False, scalar=False, spec=None): + def __init__(self, domain=None, dimension=None, initialValue=None, integer=False, scalar=False, spec=None, name=""): """ Constructor. Create a DiscreteVariable from a given ContinuousVariable.ContinuousVariable or a domain. @@ -27,6 +27,7 @@ class DiscreteVariable: self.dimension = dimension ## Application domain of the variable. self.domain = domain + self.name = name ### ------------------ self.shape = list(self.domain.elementNumber) if spec is None: @@ -36,24 +37,31 @@ class DiscreteVariable: self.values = np.zeros(self.shape, dtype=dtype_integer) else: self.values = np.zeros(self.shape, dtype=dtype_real) + else: + self.values = np.zeros(spec, dtype=dtype_real) self.initialValue = initialValue + self.contains_data = False self.gpu_mem_object = None def init(self): - if self.domain.dimension == 1: - for x in xrange(self.shape[0]): - self.values[x] = self.initialValue([self.domain.axes[0][x]]) - elif self.domain.dimension == 2: - for x in xrange(self.shape[0]): - for y in xrange(self.shape[1]): - self.values[x, y] = self.initialValue([self.domain.axes[0][x], self.domain.axes[1][y]]) - elif self.domain.dimension == 3: - for x in xrange(self.shape[0]): - for y in xrange(self.shape[1]): - for z in xrange(self.shape[2]): - self.values[x, y, z] = self.initialValue([self.domain.axes[0][x], self.domain.axes[1][y], self.domain.axes[2][z]]) - else: - raise ValueError("Creating DiscreteVariable with incorrect domain dimension.") + if not self.initialValue is None: + if self.domain.dimension == 1: + for x in xrange(self.shape[0]): + self.values[x] = self.initialValue([self.domain.axes[0][x]]) + self.contains_data = True + elif self.domain.dimension == 2: + for x in xrange(self.shape[0]): + for y in xrange(self.shape[1]): + self.values[x, y] = self.initialValue([self.domain.axes[0][x], self.domain.axes[1][y]]) + self.contains_data = True + elif self.domain.dimension == 3: + for x in xrange(self.shape[0]): + for y in xrange(self.shape[1]): + for z in xrange(self.shape[2]): + self.values[x, y, z] = self.initialValue([self.domain.axes[0][x], self.domain.axes[1][y], self.domain.axes[2][z]]) + self.contains_data = True + else: + raise ValueError("Creating DiscreteVariable with incorrect domain dimension.") def __str__(self): """ -- GitLab