From ff2e61f3235bab01c782afd23781f01231561d0e Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@imag.fr> Date: Wed, 29 Feb 2012 16:59:41 +0000 Subject: [PATCH] =?UTF-8?q?Premi=C3=A8re=20version=20GPU=20non=20valid?= =?UTF-8?q?=C3=A9e.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- parmepy/examples/mainJM.py | 24 +-- parmepy/new_ParMePy/Domain/CartesianGrid.py | 4 +- parmepy/new_ParMePy/Operator/AdvectionDOp.py | 27 ++- parmepy/new_ParMePy/Operator/RemeshingDOp.py | 39 +++- .../new_ParMePy/Utils/GPUParticularSolver.py | 192 ++++++++++++++++++ parmepy/new_ParMePy/Utils/Printer.py | 41 ++-- parmepy/new_ParMePy/Utils/RK2.py | 2 + .../new_ParMePy/Variable/DiscreteVariable.py | 1 + 8 files changed, 290 insertions(+), 40 deletions(-) create mode 100644 parmepy/new_ParMePy/Utils/GPUParticularSolver.py diff --git a/parmepy/examples/mainJM.py b/parmepy/examples/mainJM.py index 8a688032f..fc4d915fe 100644 --- a/parmepy/examples/mainJM.py +++ b/parmepy/examples/mainJM.py @@ -1,17 +1,17 @@ # -*- coding: utf-8 -*- import time -from src.Problem.ContinuousTransportProblem import * -from src.Domain.Box import * -from src.Variable.AnalyticalVariable import * -from src.Operator.Advection import * -from src.Utils.ParticularSolver import * -#from src.Utils.ForwardEuler import * -from src.Utils.RK2 import * -#from src.Utils.Lambda2 import * -from src.Utils.M6Prime import * -from src.Utils.Linear import * -from src.Utils.Printer import * +from new_ParMePy.Problem.ContinuousTransportProblem import * +from new_ParMePy.Domain.Box import * +from new_ParMePy.Variable.AnalyticalVariable import * +from new_ParMePy.Operator.Advection import * +from new_ParMePy.Utils.ParticularSolver import * +#from new_ParMePy.Utils.ForwardEuler import * +from new_ParMePy.Utils.RK2 import * +#from new_ParMePy.Utils.Lambda2 import * +from new_ParMePy.Utils.M6Prime import * +from new_ParMePy.Utils.Linear import * +from new_ParMePy.Utils.Printer import * def vitesse(x): @@ -53,7 +53,7 @@ def run(): InterpolationMethod = Linear(grid=box.discreteDomain, gvalues=velo.discreteVariable) RemeshingMethod = M6Prime(grid=box.discreteDomain, gvalues=scal.discreteVariable) ODESolver = RK2(InterpolationMethod, box.discreteDomain.applyConditions) - solver = ParticularSolver(ODESolver, InterpolationMethod, RemeshingMethod) + solver = GPUParticularSolver(ODESolver, InterpolationMethod, RemeshingMethod) cTspPb.setSolver(solver) #cTspPb.setPrinter(Printer(frequency=outputModulo, outputPrefix=outputFilePrefix, problem=cTspPb)) c0 = time.clock() diff --git a/parmepy/new_ParMePy/Domain/CartesianGrid.py b/parmepy/new_ParMePy/Domain/CartesianGrid.py index 31a4e04b5..6f9140431 100644 --- a/parmepy/new_ParMePy/Domain/CartesianGrid.py +++ b/parmepy/new_ParMePy/Domain/CartesianGrid.py @@ -27,7 +27,7 @@ class CartesianGrid(DiscreteDomain): DiscreteDomain.__init__(self) ## Grid points self.points = None - self.elementNumber = np.array(spec, dtype=int) + self.elementNumber = spec self.elementSize = box.length / self.elementNumber ## Function to create all Points depending on Box.Box dimension #self.pointsCreating = None @@ -55,7 +55,7 @@ class CartesianGrid(DiscreteDomain): self.indices = np.zeros(self.elementNumber, dtype=tuple) for p in self._iter(self.elementNumber): self.indices[tuple(p)] = tuple(p) - self.points = np.zeros(shape) + self.points = np.zeros(shape, dtype=np.float32) for ind in self.explore(): self.points[ind] = self.min + ind * self.elementSize diff --git a/parmepy/new_ParMePy/Operator/AdvectionDOp.py b/parmepy/new_ParMePy/Operator/AdvectionDOp.py index 35ef0f8a2..38eab3a0f 100644 --- a/parmepy/new_ParMePy/Operator/AdvectionDOp.py +++ b/parmepy/new_ParMePy/Operator/AdvectionDOp.py @@ -5,6 +5,7 @@ Operator representation """ from DiscreteOperator import * import numpy as np +import pyopencl as cl providedClass = "AdvectionDOp" @@ -29,6 +30,7 @@ class AdvectionDOp(DiscreteOperator): ## Transported scalar. self.scalar = advec.scalar.discreteVariable self.addVariable([self.velocity, self.scalar]) + self.gpu_kernel = None def setResultVariable(self, respos, resscal): """ @@ -53,10 +55,27 @@ class AdvectionDOp(DiscreteOperator): #for i in self.velocity.domain.explore(): # self.resultPosition.values[i] = self.velocity.domain.applyConditions(self.numMethod.integrate(self.velocity.domain.points[i], self.velocity.values[i], t, dt, splittingDirection)) #self.resultScalar.values[i] = self.scalar.values[i] - ## ------------------- NumPy Optimisation - self.resultPosition.values = self.numMethod.integrate(self.velocity.domain.points, self.velocity.values, t, dt, splittingDirection) - self.resultScalar.values[:] = self.scalar.values[:] - ## ------------------- + if self.gpu_kernel is None: + ## ------------------- NumPy Optimisation + self.resultPosition.values = self.numMethod.integrate(self.velocity.domain.points, self.velocity.values, t, dt, splittingDirection) + self.resultScalar.values[:] = self.scalar.values[:] + ## ------------------- + else: + self.gpu_queue.finish() + self.gpu_kernel(self.gpu_queue, self.gpu_shape, None, + self.velocity.domain.gpu_mem_object, + self.velocity.gpu_mem_object, + self.resultPosition.gpu_mem_object, + np.float32(t), np.float32(dt), np.int32(splittingDirection), + np.concatenate((self.velocity.domain.min, [1.])).astype(np.float32), + np.concatenate((self.velocity.domain.max, [1.])).astype(np.float32)) + evt = cl.enqueue_copy(self.gpu_queue, self.resultScalar.gpu_mem_object, self.scalar.gpu_mem_object, + src_origin=tuple([0 for x in self.gpu_shape]), + dest_origin=tuple([0 for x in self.gpu_shape]), + region=self.gpu_shape) + #self.scalar.values.fill(0.) + #cl.enqueue_copy(self.gpu_queue, self.scalar.gpu_mem_object, self.scalar.values, + #origin=tuple([0 for x in self.gpu_shape]), region=self.gpu_shape, wait_for=[evt]) def __str__(self): """ToString method""" diff --git a/parmepy/new_ParMePy/Operator/RemeshingDOp.py b/parmepy/new_ParMePy/Operator/RemeshingDOp.py index 26493ffa3..894352beb 100644 --- a/parmepy/new_ParMePy/Operator/RemeshingDOp.py +++ b/parmepy/new_ParMePy/Operator/RemeshingDOp.py @@ -5,6 +5,8 @@ Operator representation """ from DiscreteOperator import * #from ..Utils.Lambda2 import * +import numpy as np +import pyopencl as cl providedClass = "RemeshingDOp" @@ -40,6 +42,7 @@ class RemeshingDOp(DiscreteOperator): self.resultScalar = resscal self.addVariable([self.ppos, self.pscal, self.pbloc, self.ptag, self.resultScalar]) self.numMethod = method + self.gpu_kernel = None def setResultVariable(self, resscal): """ @@ -58,9 +61,6 @@ class RemeshingDOp(DiscreteOperator): #reset scalar #for i in self.resultScalar.domain.explore(): # self.resultScalar.values[i] = 0. - ## ------------------- NumPy Optimisation - self.resultScalar.values[:] = 0. - ## ------------------- #### Remaillage tag: # if isinstance(self.numMethod, Lambda2): # for i in self.resultScalar.domain.explore(): @@ -83,9 +83,36 @@ class RemeshingDOp(DiscreteOperator): # else: #for i in self.resultScalar.domain.explore(): # self.numMethod.remesh(self.ppos.values[i], self.pscal.values[i], splittingDirection) - ## ------------------- NumPy Optimisation - self.numMethod.remesh(self.ppos.values, self.pscal.values, splittingDirection) - ## ------------------- + if self.gpu_kernel is None: + ## ------------------- NumPy Optimisation + self.resultScalar.values[:] = 0. + self.numMethod.remesh(self.ppos.values, self.pscal.values, splittingDirection) + ## ------------------- + else: + cl.enqueue_write_buffer(self.gpu_queue, self.gpu_buffer, self.gpu_buffer_values) + self.gpu_queue.finish() + #print "avant" + #print self.gpu_buffer_values[10,10,:8] + #print self.gpu_buffer_values[0,15,6] + #print "fin -avant" + self.gpu_kernel[0](self.gpu_queue, self.gpu_shape, None, + self.ppos.gpu_mem_object, self.pscal.gpu_mem_object, self.resultScalar.gpu_mem_object, self.gpu_buffer, + np.float32(t), np.float32(dt), np.int32(splittingDirection), + np.concatenate((self.resultScalar.domain.min, [1.])).astype(np.float32), + np.concatenate((self.resultScalar.domain.max, [1.])).astype(np.float32)) + cl.enqueue_copy(self.gpu_queue, self.gpu_buffer_values, self.gpu_buffer) + #print "Apres" + #print self.gpu_buffer_values[0,0,0,0] + #print self.gpu_buffer_values[2,0,0,0] + #print self.gpu_buffer_values[10,10,10,0] + #print self.gpu_buffer_values[10,10,:8] + #print self.gpu_buffer_values[0,15,6] + #print "fin apres" + self.gpu_kernel[1](self.gpu_queue, self.gpu_shape, None, + self.resultScalar.gpu_mem_object, self.gpu_buffer, + np.float32(t), np.float32(dt), np.int32(splittingDirection), + np.concatenate((self.resultScalar.domain.min, [1.])).astype(np.float32), + np.concatenate((self.resultScalar.domain.max, [1.])).astype(np.float32)) def __str__(self): """ToString method""" diff --git a/parmepy/new_ParMePy/Utils/GPUParticularSolver.py b/parmepy/new_ParMePy/Utils/GPUParticularSolver.py new file mode 100644 index 000000000..fa06f3440 --- /dev/null +++ b/parmepy/new_ParMePy/Utils/GPUParticularSolver.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- +""" +@package Utils +GPU Particular solver description. +""" +import pyopencl as cl +from ParticularSolver import * + + +providedClass = "GPUParticularSolver" + + +class GPUParticularSolver(ParticularSolver): + """ + GPU Particular solver description. + Link with differents numericals methods used. Prepare GPU side (memory, kernels, ...) + """ + def __init__(self, + ODESolver, + InterpolationMethod, + RemeshingMethod, + splittingMethod='strang', + platform_name=None, device_name=None): + """ + Constructor. + Create a solver description. + """ + ParticularSolver.__init__(self, ODESolver, InterpolationMethod, RemeshingMethod, splittingMethod) + #Get available platforms. + platforms = cl.get_platforms() + self.platform = None + if len(platforms) == 1: + self.platform = platforms[0] + if not platform_name is None and self.platform.name != platform_name: + print "Warning : specified platform name (", platform_name, " ) differs from available platform :", self.platform.name + elif not platform_name is None: + for plt in platforms: + if plt.name == platform_name: + self.platform = plt + else: + raise ValueError("PyOpenCL platform choice : specified platform name is unavailable or multiples choices.") + #Get available GPU devices. + devices = self.platform.get_devices(cl.device_type.GPU) + if len(devices) == 1: + self.device = devices[0] + if not device_name is None and self.device.name != device_name: + print "Warning : specified platform name (", device_name, " ) differs from available platform :", self.device.name + elif not device_name is None: + for d in devices: + if d.name == device_name: + self.device = d + else: + raise ValueError("PyOpenCL device choice : specified device name is unavailable or multiples choices.") + #Creates GPU Context + self.ctx = cl.Context([self.device]) + #Create CommandQueue on the GPU Context + self.queue = cl.CommandQueue(self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) + + def initialize(self, discreteProblem): + """ + Solver initialisation for a given DiscreteProblem.DiscreteProblem. + + @param discreteProblem : Problem to initialize. + """ + # Initialization CPU side. + self.grid = discreteProblem.domains[0] + self.gvelo = discreteProblem.advecOp.velocity + self.gscal = discreteProblem.advecOp.scalar + self.parts = ParticleField(self.grid) + self.ppos = DiscreteVariable(domain=self.parts, dimension=self.gvelo.dimension) + self.parts.setPositions(self.ppos) + #self.pvelo = DiscreteVariable(domain=self.parts, dimension=self.gvelo.dimension) + self.pscal = DiscreteVariable(domain=self.parts, dimension=self.gscal.dimension, initialValue=lambda x: 0., scalar=True) + # Initialization GPU side. + # Images2D/Images3D/Buffers creation. + dim = self.grid.dimension + shape_dim = list(self.grid.elementNumber) + img_format = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT) + shape_img = tuple(shape_dim + [4]) + self.gpu_shape = tuple(self.grid.elementNumber) + + padded_points = np.zeros(shape_img, dtype=np.float32) + padded_points[..., 0:dim] = self.grid.points[:] + self.grid.point = padded_points + self.grid.gpu_mem_object = cl.Image(self.ctx, + cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, img_format, + shape=self.gpu_shape, hostbuf=self.grid.point) + + padded_velocity = np.zeros(shape_img, dtype=np.float32) + padded_velocity[..., 0:dim] = self.gvelo.values[:] + self.gvelo.values = padded_velocity + self.gvelo.gpu_mem_object = cl.Image(self.ctx, + cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, img_format, + shape=self.gpu_shape, hostbuf=self.gvelo.values) + + scalar_nbytes = self.gscal.values.nbytes + padded_scalar = np.zeros(shape_img, dtype=np.float32) + padded_scalar[..., 0] = self.gscal.values[:] + self.gscal.values = padded_scalar + self.gscal.gpu_mem_object = cl.Image(self.ctx, + cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, img_format, + shape=self.gpu_shape, hostbuf=self.gscal.values) + + padded_ppos = np.zeros(shape_img, dtype=np.float32) + padded_ppos[..., 0:dim] = self.ppos.values[:] + self.ppos.values = padded_ppos + self.ppos.gpu_mem_object = cl.Image(self.ctx, + cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, img_format, + shape=self.gpu_shape, hostbuf=self.ppos.values) + + # padded_pvelo = np.zeros(shape_img, dtype=np.float32) + # padded_pvelo[..., 0:dim] = self.pvelo.values[:] + # self.pvelo.values = padded_pvelo + # self.pvelo.gpu_mem_object = cl.Image(self.ctx, + # cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, img_format, + # shape=self.gpu_shape, hostbuf=self.pvelo.values) + + padded_pscal = np.zeros(shape_img, dtype=np.float32) + padded_pscal[..., 0] = self.pscal.values[:] + self.pscal.values = padded_pscal + self.pscal.gpu_mem_object = cl.Image(self.ctx, + cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, img_format, + shape=self.gpu_shape, hostbuf=self.pscal.values) + + # Remeshing : particles positions, particles scalar, particles tag, particles bloc -> grid scalar + remesh = RemeshingDOp(partPositions=self.ppos, + partScalar=self.pscal, + partBloc=self.pscal, + partTag=self.pscal, + resscal=discreteProblem.advecOp.scalar, + method=self.RemeshingMethod) + discreteProblem.addOperator([remesh]) + discreteProblem.advecOp.setResultVariable(respos=self.ppos, resscal=self.pscal) + discreteProblem.advecOp.setMethod(self.ODESolver) + + # Kernels creation/compilation + f = open("gpu_src.cl", 'r') + self.gpu_src = "".join(f.readlines()) + self.prg = cl.Program(self.ctx, self.gpu_src).build() + discreteProblem.advecOp.gpu_queue = self.queue + discreteProblem.advecOp.gpu_shape = self.gpu_shape + discreteProblem.advecOp.gpu_kernel = self.prg.integrate + remesh.gpu_queue = self.queue + remesh.gpu_shape = self.gpu_shape + remesh.gpu_kernel = (self.prg.remeshing, self.prg.remeshing_reduce) + remesh.gpu_buffer = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=scalar_nbytes * 8) + buffer_shape = list(self.gpu_shape) + buffer_shape.append(8) + remesh.gpu_buffer_values = np.zeros(tuple(buffer_shape), dtype=np.float32) + + # Splitting Step + splitting = [] + if self.splittingMethod == 'strang': + [splitting.append((i, 0.5)) for i in xrange(dim - 1)] + splitting.append((dim - 1, 1.)) + [splitting.append((dim - 2 - i, 0.5)) for i in xrange(dim - 1)] + else: + raise ValueError("Unknown splitting method : ", self.splittingMethod) + discreteProblem.splittingStep = splitting + + def collect_data(self): + o = tuple([0 for x in self.gpu_shape]) + self.queue.finish() + cl.enqueue_copy(self.queue, self.grid.point, self.grid.gpu_mem_object, origin=o, region=self.gpu_shape) + cl.enqueue_copy(self.queue, self.gvelo.values, self.gvelo.gpu_mem_object, origin=o, region=self.gpu_shape) + cl.enqueue_copy(self.queue, self.gscal.values, self.gscal.gpu_mem_object, origin=o, region=self.gpu_shape) + cl.enqueue_copy(self.queue, self.ppos.values, self.ppos.gpu_mem_object, origin=o, region=self.gpu_shape) + #cl.enqueue_copy(self.queue, self.pvelo.values, self.pvelo.gpu_mem_object, origin=o, region=self.gpu_shape) + cl.enqueue_copy(self.queue, self.pscal.values, self.pscal.gpu_mem_object, origin=o, region=self.gpu_shape) + + def terminate(self): + self.queue.finish() + + def __str__(self): + """ToString method""" + s = "GPU Particular solver " + s += "\n - ODESolver : " + str(self.ODESolver) + s += "\n - Interpolation : " + str(self.InterpolationMethod) + s += "\n - Remeshing : " + str(self.RemeshingMethod) + s += "\n - GPU informations : " + s += "\n - platform : " + self.platform.name + s += " OpenCL version: " + self.platform.version + s += "\n - device : " + self.device.name + s += " Global Memory available : " + str(self.device.global_mem_size // 1024 // 1024) + " MB ( " + str(self.device.global_mem_size) + " B)" + s += "\n Space index max : " + str(self.device.max_work_item_sizes) + s += " Max workgroup size : " + str(self.device.max_work_group_size) + return s + +if __name__ == "__main__": + print __doc__ + print "- Provided class : " + providedClass + print eval(providedClass).__doc__ diff --git a/parmepy/new_ParMePy/Utils/Printer.py b/parmepy/new_ParMePy/Utils/Printer.py index 1c4a2b797..f401f9235 100644 --- a/parmepy/new_ParMePy/Utils/Printer.py +++ b/parmepy/new_ParMePy/Utils/Printer.py @@ -3,10 +3,12 @@ @package Utils Results Printer """ +from new_ParMePy.Utils.GPUParticularSolver import * providedClass = "Printer" + class Printer(): """ Results printer. @@ -30,13 +32,18 @@ class Printer(): else: self.printStep = self._passStep self.grid = self.problem.discreteProblem.advecOp.velocity.domain - self.gvelo = self.problem.discreteProblem.advecOp.velocity - self.gscal = self.problem.discreteProblem.advecOp.scalar - self.ppos = self.problem.discreteProblem.solver.ppos - self.pvelo = self.problem.discreteProblem.solver.pvelo - self.pscal = self.problem.discreteProblem.solver.pscal - self.pbloc = self.problem.discreteProblem.solver.pbloc - self.ptag = self.problem.discreteProblem.solver.ptag + self.gvelo = self.problem.discreteProblem.advecOp.velocity.values + self.ppos = self.problem.discreteProblem.solver.ppos.values + if isinstance(self.problem.discreteProblem.solver, GPUParticularSolver): + self.gscal = self.problem.discreteProblem.advecOp.scalar.values[...,0] + self.pscal = self.problem.discreteProblem.solver.pscal.values[...,0] + else: + self.pscal = self.problem.discreteProblem.solver.pscal.values + self.gscal = self.problem.discreteProblem.advecOp.scalar.values + + #self.pvelo = self.problem.discreteProblem.solver.pvelo + # self.pbloc = self.problem.discreteProblem.solver.pbloc + # self.ptag = self.problem.discreteProblem.solver.ptag self.printStep() def _passStep(self): @@ -48,22 +55,24 @@ class Printer(): dim = len(self.grid.elementNumber) f.write("# iteration : {0}, time = {1}".format(self.ite, self.problem.discreteProblem.t)) f.write("# gid({0}) gpos({1}) gvelo({2}) gscal(1) ppos({3}) pvelo({4}) pscal(1) pbloc(1) ptag(1)\n".format(dim, dim, dim, dim, dim)) + if isinstance(self.problem.discreteProblem.solver, GPUParticularSolver): + self.problem.discreteProblem.solver.collect_data() for line in self.grid.explore(0): for i in line: for d in xrange(dim): f.write(" {0:6d}".format(i[d])) - for xx in self.grid[i]: - f.write(" {0:8.5}".format(xx)) for d in xrange(dim): - f.write(" {0:8.5}".format(self.gvelo.values[i][d])) - f.write(" {0:8.5}".format(self.gscal.values[i])) + f.write(" {:8.12}".format(self.grid[i][d])) for d in xrange(dim): - f.write(" {0:8.5}".format(self.ppos.values[i][d])) + f.write(" {:8.12}".format(self.gvelo[i][d])) + f.write(" {:8.12}".format(self.gscal[i])) for d in xrange(dim): - f.write(" {0:8.5}".format(self.pvelo.values[i][d])) - f.write(" {0:8.5}".format(self.pscal.values[i])) - f.write(" {0:6d}".format(self.pbloc.values[i])) - f.write(" {0}".format(self.ptag.values[i])) + f.write(" {:8.12}".format(self.ppos[i][d])) + # for d in xrange(dim): + # f.write(" {:8.12}".format(self.pvelo.values[i][d])) + f.write(" {:8.12}".format(self.pscal[i])) + # f.write(" {0:6d}".format(self.pbloc.values[i])) + # f.write(" {0}".format(self.ptag.values[i])) f.write("\n") f.write("\n") f.close() diff --git a/parmepy/new_ParMePy/Utils/RK2.py b/parmepy/new_ParMePy/Utils/RK2.py index 43f42aec4..9b46d6ca8 100644 --- a/parmepy/new_ParMePy/Utils/RK2.py +++ b/parmepy/new_ParMePy/Utils/RK2.py @@ -38,6 +38,8 @@ class RK2(ODESolver): # self.integrate = [self._integrate, self._integrate_X, self._integrate_XY] # else: # raise ValueError("Bad dimension for RK2 Solver.") + self.gpu_kernel = """ + """ def integrate(self, y, fy, t, dt, dir): """ diff --git a/parmepy/new_ParMePy/Variable/DiscreteVariable.py b/parmepy/new_ParMePy/Variable/DiscreteVariable.py index 979ffd349..51e649327 100644 --- a/parmepy/new_ParMePy/Variable/DiscreteVariable.py +++ b/parmepy/new_ParMePy/Variable/DiscreteVariable.py @@ -60,6 +60,7 @@ class DiscreteVariable: self.values = np.zeros(shape) for ind in self.domain.explore(): self.values[ind] = initialValue(self.domain[ind]) + self.gpu_mem_object = None def __str__(self): """ -- GitLab