diff --git a/HySoP/CMakeLists.txt b/HySoP/CMakeLists.txt index 86d7c509a33a435297ce8b320f6ec42edc8c99e4..ce87f558d8f86f634647cac65cbca5cbb43ccf61 100644 --- a/HySoP/CMakeLists.txt +++ b/HySoP/CMakeLists.txt @@ -36,6 +36,7 @@ option(BUILD_SHARED_LIBS "Enable dynamic library build, default = OFF." OFF) option(WITH_LIB_FORTRAN "Generate libparmes from fortran files in src, wrapped into parmepy.f2py module. Default = ON." ON) option(WITH_SCALES "compile/create parmesscales lib and link it with Parmes. Default = ON." ON) option(WITH_FFTW "Link with fftw library (required for some Parmes solvers), default = ON" ON) +option(WITH_GPU "Use of GPU (required for some Parmes solvers), default = ON" ON) option(WITH_MAIN_FORTRAN "Create an executable (test purpose) from fortran sources in src/main, linked with libparmes, default = ON" ON) if(NOT WITH_LIB_FORTRAN) @@ -96,6 +97,11 @@ execute_process( string(STRIP ${${PROJECT_NAME}_INSTALL_DIR} ${PROJECT_NAME}_INSTALL_DIR) set(CMAKE_INSTALL_PREFIX ${${PROJECT_NAME}_INSTALL_DIR}/${PROJECT_NAME}) +# --- OpenCL --- +if(WITH_GPU) + find_python_module(pyopencl REQUIRED) +endif() + # --- MPI --- if(USE_MPI) @@ -203,6 +209,7 @@ if(VERBOSE_MODE) message(STATUS " Project uses MPI : ${USE_MPI}") message(STATUS " Project uses Scales : ${WITH_SCALES}") message(STATUS " Project uses FFTW : ${WITH_FFTW}") + message(STATUS " Project uses GPU : ${WITH_GPU}") message(STATUS " Python packages will be installed in ${${PROJECT_NAME}_INSTALL_DIR}") message(STATUS "====================== ======= ======================") endif() diff --git a/HySoP/hysop/__init__.py.in b/HySoP/hysop/__init__.py.in index 83d03ceae2508b294873f77369c5e73e2664ae0b..0308bed2ed1a09ab218fd7a4f18023195cdf3c4f 100755 --- a/HySoP/hysop/__init__.py.in +++ b/HySoP/hysop/__init__.py.in @@ -17,40 +17,39 @@ if("@USE_MPI@" is "ON"): print "Starting @PACKAGE_NAME@ version " + str(__version__) + ".\n" else: print "Starting @PACKAGE_NAME@ (no mpi) version " + str(__version__) + ".\n" - + version = "1.0.0" #import constants from constants import PERIODIC -from domain import box, grid +from domain import box ## Box-type physical domain Box = box.Box -CartesianGrid = grid.CartesianGrid ## Cartesian grid -## Fields -import fields.discrete -import fields.continuous -import fields.analytical -ScalarField = fields.discrete.ScalarField -VectorField = fields.discrete.VectorField -ContinuousField = fields.continuous.ContinuousField -AnalyticalField = fields.analytical.AnalyticalField +# ## Fields +# import fields.discrete +# import fields.continuous +# import fields.analytical +# ScalarField = fields.discrete.ScalarField +# VectorField = fields.discrete.VectorField +# ContinuousField = fields.continuous.ContinuousField +# AnalyticalField = fields.analytical.AnalyticalField -## ## Problem -import problem.problem -Problem = problem.problem.Problem +# ## ## Problem +# import problem.problem +# Problem = problem.problem.Problem -## ## Solver -import particular_solvers.basic -## #import particular_solvers.gpu -ParticleSolver = particular_solvers.basic.ParticleSolver -## #GPUParticleSolver = particular_solvers.gpu.GPUParticleSolver +# ## ## Solver +# import particular_solvers.basic +# ## #import particular_solvers.gpu +# ParticleSolver = particular_solvers.basic.ParticleSolver +# ## #GPUParticleSolver = particular_solvers.gpu.GPUParticleSolver ## from tools.explore_hardware import explore diff --git a/HySoP/hysop/constants.py b/HySoP/hysop/constants.py index 0d9c87a6ed222eb1f8b7fe63c207ec32beab92dd..25b89690b82e318bcf4177acd6b826196598e76d 100644 --- a/HySoP/hysop/constants.py +++ b/HySoP/hysop/constants.py @@ -8,16 +8,12 @@ Constant parameters required for the parmepy package (internal use). import numpy as np import math import mpi4py.MPI as MPI -#from parmepy.particular_solvers import __path__ as solver_path import os PI = math.pi # Set default type for real and integer numbers PARMES_REAL = np.float64 PARMES_INTEGER = np.uint32 -PARMES_REAL_GPU = np.float32 -PARMES_DOUBLE_GPU = np.float64 -PARMES_INTEGER_GPU = np.uint32 ## default array layout (fortran or C convention) ORDER = 'F' ORDERMPI = MPI.ORDER_F @@ -32,5 +28,4 @@ ZDIR = 2 PERIODIC = 0 ## Directions string S_DIR = ["_X", "_Y", "_Z"] -## GPU deflault sources -#GPU_SRC = os.path.join(solver_path[0], "gpu_src.cl") + diff --git a/HySoP/hysop/domain/box.py b/HySoP/hysop/domain/box.py index 3925942f0861324d772721520f27209e3d2ac085..cb85d61f5b1fba9609a8629a8897f982219d3b33 100644 --- a/HySoP/hysop/domain/box.py +++ b/HySoP/hysop/domain/box.py @@ -2,11 +2,8 @@ Classes for box domains description. """ - -from parmepy.constants import PARMES_REAL, PARMES_INTEGER, PERIODIC from domain import Domain -from grid import CartesianGrid -import numpy as np +from parmepy.constants import np, PARMES_REAL, PARMES_INTEGER, PERIODIC class Box(Domain): @@ -49,36 +46,6 @@ class Box(Domain): self.boundaries = np.zeros((self.dimension), dtype=PARMES_INTEGER) self.boundaries[:] = PERIODIC - def discretize(self, resolution): - """ - Box discretization method. - Creates a CartesianGrid from the Box and a resolution. - - @param resolution : resolution of the discretization. - @return CartesianGrid as discretized Box. - - Use an array for discreteDomain??? i.e to associate several - discretisations for the same domain. - Or maybe this should be a member of the fields?? - - >>> import parmepy as pp - >>> import numpy as np - >>> b = pp.Box() - >>> n = 11 - >>> g = b.discretize(np.asarray([n, n, n])) - >>> (g.origin == b.origin).all() - True - >>> (g.length == b.length).all() - True - >>> (g.step == b.length/(n)).all() - True - >>> b.discreteDomain == g - True - - """ - self.discreteDomain = CartesianGrid(resolution, self) - return self.discreteDomain - def __str__(self): """ Informations display. @@ -89,8 +56,6 @@ class Box(Domain): s += " origin : " + str(self.origin) + ", maxPosition :" \ + str(self.max) + ", lengths :" + str(self.length) + "." - if self.discreteDomain is not None: - s += "Discretised on : \n" + str(self.discreteDomain) return s if __name__ == "__main__": diff --git a/HySoP/hysop/domain/discrete.py b/HySoP/hysop/domain/discrete.py deleted file mode 100644 index cf9443864d8c1335d9ca8f87ad9e6899bb41c694..0000000000000000000000000000000000000000 --- a/HySoP/hysop/domain/discrete.py +++ /dev/null @@ -1,29 +0,0 @@ -"""@package parmepy.domain.discrete - -Classes for physical domains discretization description. -""" -from abc import ABCMeta, abstractmethod - - -class DiscreteDomain: - """ Abstract description of a discretized physical domain.""" - - __metaclass__ = ABCMeta - - @abstractmethod - def __init__(self, domain=None): - """ - Create a DiscreteDomain with a given discretization. - @param domain : Domain to discretize - """ - ## Domain to discretize - self.domain = domain - if(domain is not None): - ## Domain dimension - self.dimension = domain.dimension - - -if __name__ == "__main__": - print __doc__ - print "- Provided class : DiscreteDomain" - print DiscreteDomain.__doc__ diff --git a/HySoP/hysop/domain/domain.py b/HySoP/hysop/domain/domain.py index 73b262ff334c0cc27f5bdc9364ea2467b18b123d..e02c4c438616b00ef26fa520be904fdb8627e8a3 100644 --- a/HySoP/hysop/domain/domain.py +++ b/HySoP/hysop/domain/domain.py @@ -17,14 +17,7 @@ class Domain: """ ## Domain dimension. self.dimension = dimension - ## Domain discretization. - self.discreteDomain = None - @abstractmethod - def discretize(self, resolution=None): - """ - @param resolution : discretization specifications. - """ if __name__ == "__main__": print __doc__ diff --git a/HySoP/hysop/domain/grid.py b/HySoP/hysop/domain/grid.py deleted file mode 100644 index c28cf34340cca130c3cc4fbbb487dd5519b56b10..0000000000000000000000000000000000000000 --- a/HySoP/hysop/domain/grid.py +++ /dev/null @@ -1,53 +0,0 @@ -"""@package parmepy.domain.grid - -Cartesian grid definition. -""" -from discrete import DiscreteDomain - - -class CartesianGrid(DiscreteDomain): - """ Discrete box representation as a cartesian grid.""" - - def __init__(self, resolution, domain): - """ - Creates a CartesianGrid. - - @param resolution : resolution used - @param domain : Domain to discretize - """ - DiscreteDomain.__init__(self, domain) - assert(self.dimension == len(resolution)) - ## lowest point of the grid - self.origin = domain.origin - ## size of the grid in each direction - self.length = domain.length - ## number of points in each direction - self.resolution = resolution - ## space step size - self.step = self.length / (self.resolution) - - def update(self, start): - self.start = start - - def __getitem__(self, index): - """ - Get iter overriding - - @param index list : index to get - @return point with given index - """ - return self.__coords.__getitem__(index) - - def __str__(self): - """ - ToString method. - """ - s = str(self.dimension) +\ - "D cartesian grid with resolution equal to : " +\ - str(self.resolution) + ".\n" - return s - -if __name__ == "__main__": - print __doc__ - print "- Provided class : CartesianGrid" - print CartesianGrid.__doc__ diff --git a/HySoP/hysop/domain/mesh.py b/HySoP/hysop/domain/mesh.py new file mode 100644 index 0000000000000000000000000000000000000000..36b57e6ef508103ba500bbd0d67700b8cc51a372 --- /dev/null +++ b/HySoP/hysop/domain/mesh.py @@ -0,0 +1,125 @@ +""" +@package parmepy.domain.mesh +Cartesian mesh class for local mesh. +@todo Remove LocalMesh or SubMesh +@todo rename to CartesianMesh +@todo write tests in parmepy/domain/tests/test_mesh.py +""" +from parmepy.constants import np, PARMES_REAL + + +class LocalMesh(object): + """ + Compute a local mesh resolution, using MPI process grid (topology) + and the global mesh resolution. + It also depends on ghost layer witdh and boundary conditions type. + """ + def __init__(self, rank, resolution, start, + dom_size, dom_origin, ghosts=np.zeros((3))): + # Current process rank in the topology that owns this mesh + self.rank = rank + # Local resolution + self.resolution = resolution.copy() + 2 * ghosts + # index of the lower point (in each dir) in the global mesh + self.start = start # - ghosts + # index of the upper point (in each dir), global mesh + self.end = self.start + self.resolution - 1 + # Mesh step size + self.size = dom_size + # Mesh coordinates + coord_start = self.start * self.size + dom_origin - ghosts * self.size + self.origin = coord_start + coord_end = (self.end + 1) * self.size \ + + dom_origin - ghosts * self.size +# print 'Print TOPO' +# print 'resolution', self.resolution , 'shape', +# (coord_end-coord_start)/self.size, 'coord_end', +# coord_end, 'coord_start',coord_start + if self.start.size == 3: + self.coords = np.ogrid[coord_start[0]:coord_end[0]:self.size[0], + coord_start[1]:coord_end[1]:self.size[1], + coord_start[2]:coord_end[2]:self.size[2]] + elif self.start.size == 2: + self.coords = np.ogrid[coord_start[0]:coord_end[0]:self.size[0], + coord_start[1]:coord_end[1]:self.size[1]] + else: + self.coords = np.ogrid[coord_start[0]:coord_end[0]:self.size[0]] + + def __str__(self): + """ Local mesh display """ + s = '[' + str(self.rank) + '] Local mesh resolution :' \ + + str(self.resolution) + '\n' + + s += 'Global indices :' + str(self.start) + s += ' -- ' + str(self.end) + '\n' + return s + + +class SubMesh(object): + """ + Description of a local mesh, on one process of + the topology. + """ + def __init__(self, topo, g_start, resolution): + + ## Topology that creates (and owns) this mesh + self._topology = topo + ## Local resolution of this mesh, including ghost points + self.resolution = resolution + ## Dimension of the mesh + self.dim = self.resolution.size + ## index of the lowest point of this mesh + ## (in each dir) in the global mesh + self.global_start = g_start + ## index of the upper point (in each dir), global mesh + self.global_end = self.global_start + self.resolution - 1 + ## Mesh step size + self.space_step_size = topo.domain.length / \ + (topo.globalMeshResolution - 1) + ## Mesh local indices, only for "computed" points + ## (i.e. excluding ghosts) + self.local_start = topo.ghosts.copy() + self.local_end = self.resolution.copy() - topo.ghosts[:] - 1 + ## Coordinates of the "lowest" point of this mesh (including ghost) + ## Warning FP : this strongly depends on where is the origin + ## of the domain, of the type of boundary conditions + ## and if origin is on ghosts or "real" points. + self.origin = topo.domain.origin.copy() + self.origin[:] += self.space_step_size[:] * \ + (self.global_start[:].astype(np.int32) - topo.ghosts[:]) + + self.end = self.origin + self.space_step_size * (self.resolution - 1) + if self.dim == 1: + self.coords = np.linspace(self.origin, self.end, self.resolution) + self.step = self.coords[1] = self.coords[0] + elif self.dim == 2: + cx = np.linspace(self.origin[0], self.end[0], + self.resolution[0])[:, np.newaxis] + cy = np.linspace(self.origin[1], self.end[1], + self.resolution[1])[np.newaxis, :] + self.coords = tuple([cx, cy]) + self.step = np.asarray([cx[1, 0] - cx[0, 0], + cy[0, 1] - cy[0, 0]], + dtype=PARMES_REAL) + elif self.dim == 3: + cx = np.linspace(self.origin[0], self.end[0], + self.resolution[0])[:, np.newaxis, np.newaxis] + cy = np.linspace(self.origin[1], self.end[1], + self.resolution[1])[np.newaxis, :, np.newaxis] + cz = np.linspace(self.origin[2], self.end[2], + self.resolution[2])[np.newaxis, np.newaxis, :] + self.coords = tuple([cx, cy, cz]) + self.step = np.asarray([cx[1, 0, 0] - cx[0, 0, 0], + cy[0, 1, 0] - cy[0, 0, 0], + cz[0, 0, 1] - cz[0, 0, 0]], + dtype=PARMES_REAL) + + def __str__(self): + """ Sub mesh display """ + s = 'Coords:' + str(self._topology.coords[:]) + s += ' Sub mesh resolution:' + str(self.resolution) + '\n' + s += 'Starting point global indices :' + str(self.global_start) + '\n' + s += 'Local indices for "computed" points (excluding ghosts) :\n' + s += ' start = ' + str(self.local_start) + s += ', end = ' + str(self.local_end) + '\n' + return s diff --git a/HySoP/hysop/obstacle/obstacle.py b/HySoP/hysop/domain/obstacle.py similarity index 96% rename from HySoP/hysop/obstacle/obstacle.py rename to HySoP/hysop/domain/obstacle.py index f96ce0bc37fc9f218628e1b030b2208dbdd52d14..61654cd2e54cc9bcfdea19eaba768c1e9434f29c 100644 --- a/HySoP/hysop/obstacle/obstacle.py +++ b/HySoP/hysop/domain/obstacle.py @@ -1,8 +1,7 @@ """ -Obstacles representation +Obstacles representation. """ -import numpy as np -from parmepy.constants import PARMES_REAL +from parmepy.constants import np, PARMES_REAL from obstacle_d import Obstacle_d diff --git a/HySoP/hysop/obstacle/obstacle_d.py b/HySoP/hysop/domain/obstacle_d.py similarity index 94% rename from HySoP/hysop/obstacle/obstacle_d.py rename to HySoP/hysop/domain/obstacle_d.py index 572a4482d19fa59bf72ba94ef888a59fdf8482f9..575a993844f0c9b71d2c66025bf702b121bba90c 100644 --- a/HySoP/hysop/obstacle/obstacle_d.py +++ b/HySoP/hysop/domain/obstacle_d.py @@ -1,11 +1,12 @@ """ Discrete obstacles description +@todo write test in parmepy/domain/test/test_obstacle.py """ -import numpy as np +from parmepy.constants import np class Obstacle_d(object): - """ + """ Discrete obstacles representation. """ @@ -55,13 +56,14 @@ class Obstacle_d(object): self.chiPorous = None def chiFunctions(self): - """ + """ Compute the chi functions associated to z-boundaries and solid """ # boolOrientation=[np.bool(0) for i in range (6)] # boolOrientation[self.indOrientation(self.orientation)]=True ## Temporary arrays + #TODO:utiliser des tableaux numpy ! chiBoundaryTmp = [] chiSolidTmp = [] chiPorousTmp = [] @@ -70,14 +72,14 @@ class Obstacle_d(object): step = self.topology.mesh.size ghosts = self.topology.ghosts - coord_start = self.topology.mesh.origin + coord_start = self.topology.mesh.origin coord_end = self.topology.mesh.end * step + self.topology.domain.origin - (ghosts * step) - layerMin = coord_start[2] + ghosts[2] * step[2] + self.zlayer + layerMin = coord_start[2] + ghosts[2] * step[2] + self.zlayer layerMax = coord_end[2] - ghosts[2] * step[2] - self.zlayer radiusMinuslayer = self.radius- self.porousLayer print 'start, end, layerMin, layerMax', coord_start, coord_end, layerMin, layerMax for k in xrange (ghosts[2], self.resolution[2] - ghosts[2]): - rz = coord_start[2] + step[2] * k + rz = coord_start[2] + step[2] * k for j in xrange (ghosts[1], self.resolution[1] - ghosts[1]): ry = coord_start[1] + step[1] * j for i in xrange (ghosts[0], self.resolution[0] - ghosts[0]): @@ -91,12 +93,12 @@ class Obstacle_d(object): rx = coord_start[0] + step[0] * i dist = np.sqrt((rx - self.center[0]) **2 + (ry - self.center[1]) **2 + - (rz - self.center[2]) **2) + (rz - self.center[2]) **2) if (self.name=='sphere'): if (radiusMinuslayer < dist and dist <= self.radius + 1E-12 and self.porousLayer != 0.): # we are in the porous area of the sphere: chiPorousTmp.append([i,j,k]) -# chiPorousTmp0.append(i) +# chiPorousTmp0.append(i) # chiPorousTmp1.append(j) # chiPorousTmp2.append(k) if (dist <= radiusMinuslayer + 1E-12): diff --git a/HySoP/hysop/domain/tests/test_box.py b/HySoP/hysop/domain/tests/test_box.py new file mode 100644 index 0000000000000000000000000000000000000000..fab4b8332912a8d6ea3dee57591a7c1eb258d496 --- /dev/null +++ b/HySoP/hysop/domain/tests/test_box.py @@ -0,0 +1,26 @@ +""" +Testing parmepy.domain.box.Box +""" +import parmepy as pp +from parmepy.constants import np + + +def test_create_box1(): + """Test default parameters""" + dom = pp.box.Box() + assert dom.dimension == 3 + assert np.allclose(dom.length, np.ones_like(dom.length)) + assert np.allclose(dom.origin, np.zeros_like(dom.origin)) + assert [b == pp.PERIODIC for b in dom.boundaries] + + +def test_create_box2(): + """Test given parameters""" + L = np.asarray([1., 2.5]) + ori = np.asarray([1, 2.1]) + dom = pp.box.Box(2, length=L, origin=ori) + assert dom.dimension == 2 + assert np.allclose(dom.length, L) + assert np.allclose(dom.origin, ori) + assert [b == pp.PERIODIC for b in dom.boundaries] + diff --git a/HySoP/hysop/domain/tests/test_domain.py b/HySoP/hysop/domain/tests/test_domain.py deleted file mode 100644 index 0ff83f1868c8e5e420b7837d0a3d0bf8f5a549a2..0000000000000000000000000000000000000000 --- a/HySoP/hysop/domain/tests/test_domain.py +++ /dev/null @@ -1,35 +0,0 @@ -import parmepy as pp -import numpy as np - - -tol = np.finfo(np.double).eps - - -class TestBox: - - def test_create_box1(self): - dom = pp.Box() - assert dom.dimension == 3 - assert np.allclose(dom.length, 1.0, atol=tol) - testList = dom.origin == 0 - assert testList.all() - assert dom.boundaries.all() == pp.PERIODIC - - def test_create_box2(self): - L = np.asarray([1., 2.5]) - ori = np.asarray([1, 2.1]) - dom = pp.Box(2, length=L, origin=ori) - assert dom.dimension == 2 - assert np.allclose(dom.length[:], L, atol=tol) - testList = dom.origin == ori - assert testList.all() - assert dom.boundaries.all() == pp.PERIODIC - - def test_discretizeBox(self): - dom = pp.Box() - resolution = [12, 12, 12] - discrdom = dom.discretize(resolution) - assert type(discrdom) is pp.CartesianGrid - testList = np.asarray(discrdom.resolution) == resolution - assert testList.all() - assert discrdom.domain == dom diff --git a/HySoP/hysop/domain/tests/test_mesh.py b/HySoP/hysop/domain/tests/test_mesh.py new file mode 100644 index 0000000000000000000000000000000000000000..98454241057f3daf8596b39611bcab6acdf4784a --- /dev/null +++ b/HySoP/hysop/domain/tests/test_mesh.py @@ -0,0 +1,6 @@ +""" +Testing parmepy.domain.mesh.CartesianMesh +""" +import parmepy as pp +from parmepy.constants import np + diff --git a/HySoP/hysop/domain/tests/test_obstacle.py b/HySoP/hysop/domain/tests/test_obstacle.py new file mode 100644 index 0000000000000000000000000000000000000000..8bd41f40eaf026737f9bfb48f6f9689163eed671 --- /dev/null +++ b/HySoP/hysop/domain/tests/test_obstacle.py @@ -0,0 +1,6 @@ +""" +Testing parmepy.domain.obstacle.Obstacle +""" +import parmepy as pp +from parmepy.constants import np + diff --git a/HySoP/hysop/fields/analytical.py b/HySoP/hysop/fields/analytical.py index b8c04f6c8fb0d510c1ec89b3f9445e09a8e2f55b..5d6780c4c4c4753a456fafb83da1622cab22b7a9 100644 --- a/HySoP/hysop/fields/analytical.py +++ b/HySoP/hysop/fields/analytical.py @@ -3,12 +3,10 @@ Continuous variable description defined with an analytic formula. """ -from .continuous import ContinuousField -from .discrete import ScalarField -from .discrete import VectorField +from parmepy.fields.continuous import Field -class AnalyticalField(ContinuousField): +class AnalyticalField(Field): """ A variable (continuous) defined with an analytic formula. @@ -25,7 +23,7 @@ class AnalyticalField(ContinuousField): @note formula is used in ScalarField or VectorField as vectorized function by numpy. """ - ContinuousField.__init__(self, domain, name, vector) + Field.__init__(self, domain, name, vector) ## Analytic formula. self.formula = formula @@ -49,8 +47,8 @@ class AnalyticalField(ContinuousField): def __str__(self): """ToString method""" s = " Analytical variable (ContinuousField)\n" - s += " dimension={0}\n".format(self.dimension) - return s + "\n" + s += Field.__str__(self) + return s if __name__ == "__main__": print __doc__ diff --git a/HySoP/hysop/fields/continuous.py b/HySoP/hysop/fields/continuous.py index 75f0925b23815dbb9883bd00519d881a114e349b..bd6a6f70f681fe3020c3ae1380f04039f23cd768 100644 --- a/HySoP/hysop/fields/continuous.py +++ b/HySoP/hysop/fields/continuous.py @@ -3,16 +3,16 @@ Continuous variable description """ -from .discrete import ScalarField -from .discrete import VectorField +from parmepy.fields.scalar import ScalarField +from parmepy.fields.vector import VectorField -class ContinuousField(object): +class Field(object): """ A variable defined on a physical domain """ def __init__(self, domain, name="?", vector=False): """ - Create a ContinuousField. + Create a Field. @param domain : variable application domain. @param name : name of the variable (used for vtk output). @@ -30,25 +30,37 @@ class ContinuousField(object): self._fieldId = -1 ## List of the various discretizations of this field self.discreteField = [] + ## List of the various topologies that discretizations are based on + self.topologies = [] ## Is field is a vector field self.vector = vector def discretize(self, topology=None): """ - Discretization of the field on a topology - @param topology : Topology definition to use - @return discrete field and its index + Discretization of the field on a topology. + @param topology : Topology definition to use. + @return discrete field and its index. + + Do not create an other DiscreteField object if topology already used. """ - self._fieldId += 1 - if self.vector: - self.discreteField.append(VectorField(self, topology, - name=self.name + "_D_" + str(self._fieldId), - idFromParent=self._fieldId)) + if topology in self.topologies: + ## finds fieldID for corresponding topology + fId = [df.topology for df in self.discreteField].index(topology) else: - self.discreteField.append(ScalarField(self, topology, - name=self.name + "_D_" + str(self._fieldId), - idFromParent=self._fieldId)) - return self.discreteField[self._fieldId], self._fieldId + self._fieldId += 1 + fId = self._fieldId + if self.vector: + self.discreteField.append( + VectorField(self, topology, + name=self.name + "_D_" + str(fId), + idFromParent=fId)) + else: + self.discreteField.append( + ScalarField(self, topology, + name=self.name + "_D_" + str(fId), + idFromParent=fId)) + self.topologies.append(topology) + return self.discreteField[fId], fId def initialize(self): """ diff --git a/HySoP/hysop/fields/discrete.py b/HySoP/hysop/fields/discrete.py index 1a776e2d1140030de18b3e783c8ffa9cf9c5a7ff..5dcfe1393aa1281096ecd32ee4f05aa884c7b076 100644 --- a/HySoP/hysop/fields/discrete.py +++ b/HySoP/hysop/fields/discrete.py @@ -3,22 +3,17 @@ Discretes variables (scalar and vectors) descriptions. """ -import numpy as np -from parmepy.constants import ORDER, PARMES_REAL +from abc import ABCMeta, abstractmethod -#import evtk.hl as evtk +class DiscreteField(object): + """Abstract description of discrete field. """ -class ScalarField(object): - """ - A scalar field, defined on each grid of each - subdomain of the given topology. - - """ + __metaclass__ = ABCMeta + @abstractmethod def __init__(self, parent, topology=None, name="?", idFromParent=None): - """ - Creates a ScalaField. + """ Creates a ScalaField. @param parent : Continuous field. @param topology : Topology informations @@ -36,209 +31,43 @@ class ScalarField(object): self.dimension = topology.domain.dimension ## Field resolution. self.resolution = self.topology.mesh.resolution - ## Field data numpy array. \todo is numpy array zeros or empty??? - self.data = np.zeros((self.resolution), dtype=PARMES_REAL, order=ORDER) - ## Pointer to gpu data array if needed. - self.gpu_data = None ## @private Continuous field self.__parentField = parent ## @private Index in parent's field discrete fields self.__idFromParent = idFromParent ## Application domain of the variable. - self.domain = self.__parentField.domain.discreteDomain + self.domain = self.topology.mesh ## Is self.data contains data self.contains_data = False ## Scalar is not vector - self.vector = False - - def __str__(self): - """ Display class information. """ - s = '[' + str(self.topology.rank) + '] ' - s += " id " + str(self.__idFromParent) + ", " - s += self.name + " " + str(self.dimension) + 'D discrete field with resolution ' - s += str(self.data.shape) + "." - return s + "\n" + self.vector = None + ## Field data numpy array. + self.data = None + @abstractmethod def __getitem__(self, i): - """ - Access to the content of the field. - @param i : requested index. - @return data[i]. - """ - return self.data.__getitem__(i) + """ Access to the content of the field.""" + raise ValueError("Abstract method") + @abstractmethod def __setitem__(self, i, value): - """ - Set the content of the field component at position i. - - @param i : requested index. - @param value : value to set. - - Usage :\n - A = ScalarField(topo)\n - A[2,1,1] = 12.0 # Calls A.data[2,1,1] = 12.0 - """ - self.data.__setitem__(i, value) + """ Access to the content of the field.""" + raise ValueError("Abstract method") def initialize(self, formula=None): - """ - Initialize values with given formula. - - @param formula : formula to apply on coordinates. - - Formula need to be vectorized by numpy. Therefore formula must have the following signature: - @li 3D : float formula(float x, float y, float z) - @li 2D : float formula(float x, float y) - where x,y,z are point coordinates. - """ - if formula is not None: - print "...", - v_formula = np.vectorize(formula) - if self.dimension == 3: - self.data = v_formula(self.topology.mesh.coords[0][0:self.resolution[0],0:self.resolution[1],0:self.resolution[2]], - self.topology.mesh.coords[1][0:self.resolution[0],0:self.resolution[1],0:self.resolution[2]], - self.topology.mesh.coords[2][0:self.resolution[0],0:self.resolution[1],0:self.resolution[2]]) - elif self.dimension == 2: - self.data = v_formula(self.topology.mesh.coords[0][0:self.resolution[0],0:self.resolution[1]], - self.topology.mesh.coords[1][0:self.resolution[0],0:self.resolution[1]]) - else: - self.data = v_formula(self.topology.mesh.coords[0][0:self.resolution[0]]) - self.contains_data = True - else: - print "No formula", - - -class VectorField(object): - """ - A vector field, defined on each grid of each subdomain of the given topology. - Vector field is composed of several ScalarFields - """ - - def __init__(self, parent, topology=None, name="?", idFromParent=None): - """ - Creates a VectorField. - - @param parent : Continuous field. - @param topology : Topology informations - @param name : Field name - @param idFromParent : Index in the parent's discrete fields - """ - if(topology is not None): - ## Topology informations. - self.topology = topology - else: - raise NotImplementedError() - ## Field name. - self.name = name - ## Field dimension. - self.dimension = topology.domain.dimension - ## Field resolution. - self.resolution = self.topology.mesh.resolution - ## Field data numpy array. \todo is numpy array zeros or empty??? - self.data = [np.zeros((self.resolution), dtype=PARMES_REAL, order=ORDER) for d in xrange(self.dimension)] - ## Pointer to gpu data arrays if needed. - self.gpu_data = [None for d in xrange(self.dimension)] - ## @private Continuous field - self.__parentField = parent - ## @private Index in parent's field discrete fields - self.__idFromParent = idFromParent - ## Application domain of the variable. - self.domain = self.__parentField.domain.discreteDomain - ## Is self.data contains data - self.contains_data = False - ## Scalar is not vector - self.vector = True + """Initialize values with given formula.""" + raise ValueError("Abstract method") - def __str__(self): - """ Display class information """ - s = '[' + str(self.topology.rank) + '] ' - s += " id " + str(self.__idFromParent) + ", " - s += self.name + " " + str(self.dimension) + 'D discrete vector field with resolution ' - s += str([data.shape for data in self.data]) + "." - return s + "\n" + def get_data_method(self): + pass - def __getitem__(self, i): - """ Access to the content of the field. - - @param i : requested index. - @return data[i] depending on i. - - Usage (3D): \n - A = VectorField(...), We have 3 components len(a.data) == 3. \n - Following instructions access to index 2,1,1 of y component: - @li A[1,2,1,1] - @li A[1][2,1,1] - @li Access to whole vector of index 2,1,1: A[2,1,1] - @note Access to all datas as an array : A[:,:,:] (resulting shape is (dim, Nx, Ny, Nz)) - @note Access to all datas in y component as an array : A[1][:,:,:] (resulting shape is (Nx, Ny, Nz)) - """ - try: - if len(i) == len(self.data): - return np.asarray([data.__getitem__(i) for data in self.data]) - else: - return self.data[i[0]].__getitem__(tuple(i[1:])) - except (TypeError): - return self.data[i] - def __setitem__(self, i, value): - """ - Set the content of the vector field component at position i. - - @param i : requested index. - @param value : value to set. +if __name__ == "__main__": + print __doc__ + print "- Provided class : Domain (abstract)." + print Domain.__doc__ - Usage :\n - A = VectorField(topo)\n - A[2,1,1] = 12.0 # Calls A.data[d][2,1,1] = 12.0 for all components d.\n - A[2,1,1] = [12.0, 13.0, 14.0] # Calls A.data[0][2,1,1] = 12.0, A.data[1][2,1,1] = 13.0 and A.data[2][2,1,1] = 14.0\n - A[1][2,1,1] = 13.0 # Calls A.data[1][2,1,1] = 12.0 - A[1,2,1,1] = 13.0 # Calls A.data[1][2,1,1] = 12.0 - """ - if len(i) == len(self.data): - try: - [data.__setitem__(i, v) for data, v in zip(self.data, value)] - except (TypeError): - [data.__setitem__(i, value) for data in self.data] - elif len(i) > len(self.data): - self.data[i[0]].__setitem__(tuple(i[1:]), value) - def initialize(self, formula=None): - """ - Initialize values with given formula. - @param formula : formula to apply on coordinates. - Formula need to be vectorized by numpy. - Therefore formula must have the following signature: - @li 3D : float, float, float formula(float x, float y, float z) - @li 2D : float, float formula(float x, float y) - where x,y,z are point coordinates. - """ - if formula is not None: - print "...", - v_formula = np.vectorize(formula) - if self.dimension == 3: - self.data[0], self.data[1], self.data[2] = \ - v_formula(*self.topology.mesh.coords) - self.data[0] = np.asarray(self.data[0], - dtype=PARMES_REAL, order=ORDER) - self.data[1] = np.asarray(self.data[1], - dtype=PARMES_REAL, order=ORDER) - self.data[2] = np.asarray(self.data[2], - dtype=PARMES_REAL, order=ORDER) - elif self.dimension == 2: - self.data[0], self.data[1] = \ - v_formula(*self.topology.mesh.coords) - self.data[0] = np.asarray(self.data[0], - dtype=PARMES_REAL, order=ORDER) - self.data[1] = np.asarray(self.data[1], - dtype=PARMES_REAL, order=ORDER) - - self.contains_data = True - else: - print "No formula", -if __name__ == "__main__": - print __doc__ - print "- Provided class : ScalarField, VectorField" - print ScalarField.__doc__ diff --git a/HySoP/hysop/fields/gpu_discrete.py b/HySoP/hysop/fields/gpu_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..b47c7af5d93562830850b7d6ce892de9d76706c1 --- /dev/null +++ b/HySoP/hysop/fields/gpu_discrete.py @@ -0,0 +1,228 @@ +""" +@package parmepy.fields.gpu_discrete +""" +from parmepy.constants import ORDER, np +from parmepy.gpu import cl, PARMES_REAL_GPU +from parmepy.fields.vector import VectorField +from parmepy.fields.scalar import ScalarField + + +class GPUDiscreteField(object): + def __init__(self, queue): + self.queue = queue + self.gpu_data = None + print self.name, "buffer allocation " + self.mem_size = 0 + + +class GPUVectorField(VectorField, GPUDiscreteField): + """ + GPU Discrete field implementation + """ + def __init__(self, queue, parent, + topology=None, name="?", idFromParent=None, + precision=PARMES_REAL_GPU): + super(GPUVectorField, self).__init__(self, + parent, topology, + name, idFromParent) + self.gpu_data = [None for d in xrange(self.dimension)] + for d in xrange(self.dimension): + self.data[d] = np.asarray(self.data[d], + dtype=precision, order=ORDER) + self.gpu_data[d] = cl.Buffer(self.queue.context, + cl.mem_flags.READ_WRITE, + size=self.data[d].nbytes) + self.mem_size += self.gpu_data[d].size + + @classmethod + def fromVectorField(cls, queue, vfield, precision=PARMES_REAL_GPU): + vfield.__class__ = cls + GPUDiscreteField.__init__(vfield, queue) + vfield.gpu_data = [None for d in xrange(vfield.dimension)] + for d in xrange(vfield.dimension): + vfield.data[d] = np.asarray(vfield.data[d], + dtype=precision, order=ORDER) + vfield.gpu_data[d] = cl.Buffer(vfield.queue.context, + cl.mem_flags.READ_WRITE, + size=vfield.data[d].nbytes) + vfield.mem_size += vfield.gpu_data[d].size + + def toDevice(self): + """ + Host to device method. + + @param queue : OpenCL queue. + + Performs a direct OpenCL copy from numpy arrays + to OpenCL Buffers.\n + Arrange memory on device so that vector components are + contiguous in the direction of the component.\n + Example : A 3D vector field F(x,y,z) is made up of 3 + OpenCL Buffers Fx, Fy, Fz. The memory layout is : + - Fx : x-major ordering. On device, + Fx[i + j*WIDTH + k*WIDTH*HEIGHT] access to Fx(i,j,k) + - Fy : y-major ordering. On device, + Fy[i + j*WIDTH + k*WIDTH*HEIGHT] access to Fy(j,i,k) + - Fz : z-major ordering. On device, + Fz[i + j*WIDTH + k*WIDTH*HEIGHT] access to Fz(k,i,j) + """ + print "host->device :", self.name, + time = 0. + evt = [None, None, None] + evt[0] = cl.enqueue_copy( + self.queue, self.gpu_data[0], + self.data[0].ravel(order=ORDER)) + evt[1] = cl.enqueue_copy( + self.queue, self.gpu_data[1], + self.data[1].swapaxes(0, 1).ravel(order=ORDER)) + if self.dimension == 3: + evt[2] = cl.enqueue_copy( + self.queue, self.gpu_data[2], + self.data[2].swapaxes(0, 1).swapaxes(0, 2).ravel(order=ORDER)) + self.queue.finish() + for e in evt: + if e is not None: + time += (e.profile.end - e.profile.start) * 1e-9 + self.data_on_device = True + print self.mem_size, "Bytes transfered at ", + print "{0:.3f} GBytes/sec".format( + self.mem_size / (time * 1024 ** 3)) + return self.mem_size, time + + def toHost(self): + """ + Device to host method. + + @param queue : OpenCL queue. + @param discreteField : Variable to transfer. + + Performs a direct OpenCL copy from OpenCL Buffers + to numpy arrays.\n + As memory layout is arranged on device, not only a + copy is performed but also transpositions to have numpy + arrays consistent to each other. + """ + if not self.contains_data: + print "device->host :", self.name, + time = 0. + evt = [None, None, None] + # Use of discreteField.data[XDIR] as temporary array + #\todo Need cleaner solution + # Get ZDIR component + if self.dimension == 3: + evt[2] = cl.enqueue_copy(self.queue, self.data[0], + self.gpu_data[2]) + self.data[2] = self.data[0]\ + .swapaxes(0, 2).swapaxes(0, 1) + # Get YDIR component + evt[1] = cl.enqueue_copy(self.queue, self.data[0], + self.gpu_data[1]) + self.data[1] = self.data[0].swapaxes(0, 1) + # Get XDIR component + evt[0] = cl.enqueue_copy(self.queue, self.data[0], + self.gpu_data[0]) + self.queue.finish() + for e in evt: + if e is not None: + time += (e.profile.end - e.profile.start) * 1e-9 + self.data_on_device = False + print self.mem_size, "Bytes transfered at ", + print "{0:.3f} GBytes/sec".format( + self.mem_size / (time * 1024 ** 3)) + return self.mem_size, time + + +class GPUScalarField(ScalarField, GPUDiscreteField): + """ + GPU Discrete field implementation + """ + def __init__(self, queue, parent, + topology=None, name="?", idFromParent=None, + precision=PARMES_REAL_GPU): + super(GPUScalarField, self).__init__(self, + parent, topology, + name, idFromParent) + self.queue = queue + self.gpu_data = None + print self.name, "buffer allocation " + self.mem_size = 0 + self.data = np.asarray(self.data, + dtype=precision, order=ORDER) + self.gpu_data = cl.Buffer(self.queue.context, + cl.mem_flags.READ_WRITE, + size=self.data.nbytes) + self.mem_size = self.gpu_data.size + + @classmethod + def fromScalarField(cls, queue, sfield, precision=PARMES_REAL_GPU): + sfield.__class__ = cls + GPUDiscreteField.__init__(sfield, queue) + sfield.data = np.asarray(sfield.data, + dtype=precision, order=ORDER) + sfield.gpu_data = cl.Buffer(sfield.queue.context, + cl.mem_flags.READ_WRITE, + size=sfield.data.nbytes) + sfield.mem_size = sfield.gpu_data.size + + def toDevice(self): + """ + Host to device method. + + @param queue : OpenCL queue. + + Performs a direct OpenCL copy from numpy arrays + to OpenCL Buffers.\n + Arrange memory on device so that vector components are + contiguous in the direction of the component.\n + Example : A 3D vector field F(x,y,z) is made up of 3 + OpenCL Buffers Fx, Fy, Fz. The memory layout is : + - Fx : x-major ordering. On device, + Fx[i + j*WIDTH + k*WIDTH*HEIGHT] access to Fx(i,j,k) + - Fy : y-major ordering. On device, + Fy[i + j*WIDTH + k*WIDTH*HEIGHT] access to Fy(j,i,k) + - Fz : z-major ordering. On device, + Fz[i + j*WIDTH + k*WIDTH*HEIGHT] access to Fz(k,i,j) + """ + print "host->device :", self.name, + time = 0. + evt = [None, None, None] + evt[0] = cl.enqueue_copy(self.queue, self.gpu_data, + self.data.ravel(order=ORDER)) + self.queue.finish() + for e in evt: + if e is not None: + time += (e.profile.end - e.profile.start) * 1e-9 + self.data_on_device = True + print self.mem_size, "Bytes transfered at ", + print "{0:.3f} GBytes/sec".format( + self.mem_size / (time * 1024 ** 3)) + return self.mem_size, time + + def toHost(self): + """ + Device to host method. + + @param queue : OpenCL queue. + @param discreteField : Variable to transfer. + + Performs a direct OpenCL copy from OpenCL Buffers + to numpy arrays.\n + As memory layout is arranged on device, not only a + copy is performed but also transpositions to have numpy + arrays consistent to each other. + """ + if not self.contains_data: + print "device->host :", self.name, + time = 0. + evt = [None, None, None] + evt[0] = cl.enqueue_copy(self.queue, self.data, + self.gpu_data) + self.queue.finish() + for e in evt: + if e is not None: + time += (e.profile.end - e.profile.start) * 1e-9 + self.data_on_device = False + print self.mem_size, "Bytes transfered at ", + print "{0:.3f} GBytes/sec".format( + self.mem_size / (time * 1024 ** 3)) + return self.mem_size, time diff --git a/HySoP/hysop/fields/gpuscalar.py b/HySoP/hysop/fields/gpuscalar.py new file mode 100644 index 0000000000000000000000000000000000000000..a955256ebc0e151f4cc5a74b6a9bfee790b8ac1a --- /dev/null +++ b/HySoP/hysop/fields/gpuscalar.py @@ -0,0 +1,37 @@ +""" +@package parmepy.fields.gpuscalar + +Discrete variable scalar description for GPU. +@todo missing tests +""" +from parmepy.constants import np, ORDER, PARMES_REAL +from parmepy.fields.discrete import DiscreteField + + +class GPUScalarField(DiscreteField, GPUDiscreteField): + """ + A scalar field, defined on each grid of each + subdomain of the given topology. + + """ + + def __init__(self, *args, **kwargs): + """Constructor for a scalar field""" + super(GPUScalarField, self).__init__(self, *args, **kwargs) + self.vector = False + self.data = np.zeros((self.resolution), dtype=PARMES_REAL, order=ORDER) + + def __str__(self): + """ Display class information. """ + s = '[' + str(self.topology.rank) + '] ' + s += " id " + str(self.__idFromParent) + ", " + s += self.name + " " + str(self.dimension) + s += 'D gpu discrete field with resolution ' + s += str(self.data.shape) + "." + return s + "\n" + + +if __name__ == "__main__": + print __doc__ + print "- Provided class : Scalar" + print ScalarField.__doc__ diff --git a/HySoP/hysop/fields/gpuvector.py b/HySoP/hysop/fields/gpuvector.py new file mode 100644 index 0000000000000000000000000000000000000000..84b4a595874eb838ba80952decf20c9d297f483d --- /dev/null +++ b/HySoP/hysop/fields/gpuvector.py @@ -0,0 +1,39 @@ +""" +@package parmepy.fields.gpuvector + +Discrete variable scalar description fot GPU. +@todo missing test +""" +from parmepy.constants import np, ORDER, PARMES_REAL +from parmepy.fields.discrete import DiscreteField + + +class VectorField(DiscreteField): + """ + A vector field, defined on each grid of each subdomain + of the given topology. + Vector field is composed of several ScalarFields + """ + + def __init__(self, *args, **kwargs): + """Constructor for a vector field""" + super(VectorField, self).__init__(self, *args, **kwargs) + self.data = [np.zeros((self.resolution), + dtype=PARMES_REAL, order=ORDER) + for d in xrange(self.dimension)] + self.vector = True + + def __str__(self): + """ Display class information """ + s = '[' + str(self.topology.rank) + '] ' + s += " id " + str(self.__idFromParent) + ", " + s += self.name + " " + str(self.dimension) + s += 'D gpu discrete vector field with resolution ' + s += str([data.shape for data in self.data]) + "." + return s + "\n" + + +if __name__ == "__main__": + print __doc__ + print "- Provided class : VectorField" + print ScalarField.__doc__ diff --git a/HySoP/hysop/fields/scalar.py b/HySoP/hysop/fields/scalar.py new file mode 100644 index 0000000000000000000000000000000000000000..18ce6931d7708c2b235d2dc2a5a094960d3183fe --- /dev/null +++ b/HySoP/hysop/fields/scalar.py @@ -0,0 +1,82 @@ +""" +@package parmepy.fields.scalar + +Discrete variable scalar description. +""" +from parmepy.constants import np, ORDER, PARMES_REAL +from parmepy.fields.discrete import DiscreteField + + +class ScalarField(DiscreteField): + """ + A scalar field, defined on each grid of each + subdomain of the given topology. + + """ + + def __init__(self, parent, topology=None, name="?", idFromParent=None): + """Constructor for a scalar field""" + super(ScalarField, self).__init__(parent, topology, + name, idFromParent) + self.vector = False + self.data = np.zeros((self.resolution), dtype=PARMES_REAL, order=ORDER) + + def __getitem__(self, i): + """ + Access to the content of the field. + @param i : requested index. + @return data[i]. + """ + return self.data.__getitem__(i) + + def __setitem__(self, i, value): + """ + Set the content of the field component at position i. + + @param i : requested index. + @param value : value to set. + + Usage :\n + A = ScalarField(topo)\n + A[2,1,1] = 12.0 # Calls A.data[2,1,1] = 12.0 + """ + self.data.__setitem__(i, value) + + def initialize(self, formula=None): + """ + Initialize values with given formula. + + @param formula : formula to apply on coordinates. + + Formula need to be vectorized by numpy. + Therefore formula must have the following signature: + @li 3D : float formula(float x, float y, float z) + @li 2D : float formula(float x, float y) + where x,y,z are point coordinates. + """ + if formula is not None: + print "...", + v_formula = np.vectorize(formula) + if self.dimension == 3: + self.data = v_formula(*self.topology.mesh.coords) + elif self.dimension == 2: + self.data = v_formula(*self.topology.mesh.coords) + else: + self.data = v_formula(self.topology.mesh.coords) + self.contains_data = True + else: + print "No formula", + + def __str__(self): + """ Display class information. """ + s = '[' + str(self.topology.rank) + '] ' + s += self.name + " " + str(self.dimension) + s += 'D discrete field with resolution ' + s += str(self.data.shape) + "." + return s + "\n" + + +if __name__ == "__main__": + print __doc__ + print "- Provided class : Scalar" + print ScalarField.__doc__ diff --git a/HySoP/hysop/fields/tests/__init__.py b/HySoP/hysop/fields/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..64b9577969b28b74de5e67cfc63fb4bec85a6652 --- /dev/null +++ b/HySoP/hysop/fields/tests/__init__.py @@ -0,0 +1,8 @@ +""" +@package parmepy.domain +@file parmepy/domain/__init__.py + +Everything concerning physical domains, their discretizations +and MPI topologies. + +""" diff --git a/HySoP/hysop/fields/tests/test_discretefield.py b/HySoP/hysop/fields/tests/test_discretefield.py new file mode 100644 index 0000000000000000000000000000000000000000..8b305c1e932e0aaadf3f63e913d99304158a3945 --- /dev/null +++ b/HySoP/hysop/fields/tests/test_discretefield.py @@ -0,0 +1,128 @@ +""" +Testing parmepy.field.scalar.ScalarField, + parmepy.field.scalar.VectorField +""" +import parmepy as pp +from parmepy.constants import np +from parmepy.fields.continuous import Field +from parmepy.fields.analytical import AnalyticalField + + +def test_discretization(): + """Test multiple discretizations""" + dom = pp.box.Box() + csf = Field(dom) + cvf = Field(dom, vector=True) + resolTopo = [33, 33, 17] + topo = pp.topology.Cartesian(dom, 3, resolTopo) + csf.discretize(topo) + cvf.discretize(topo) + assert np.equal(csf.discreteField[0].resolution, + csf.discreteField[0].data.shape).all() + assert not csf.discreteField[0].contains_data + assert np.equal(cvf.discreteField[0].resolution, + cvf.discreteField[0].data[0].shape).all() + assert np.equal(cvf.discreteField[0].resolution, + cvf.discreteField[0].data[1].shape).all() + assert np.equal(cvf.discreteField[0].resolution, + cvf.discreteField[0].data[2].shape).all() + assert not cvf.discreteField[0].contains_data + + +def test_init_scalar(): + dom = pp.box.Box() + csf = Field(dom) + resolTopo = [33, 11, 17] + topo = pp.topology.Cartesian(dom, 3, resolTopo) + csf.discretize(topo) + scalar = lambda x, y, z: x * y * z + true_data = np.zeros_like(csf.discreteField[0].data) + for i in np.arange(resolTopo[0] - 1): + for j in np.arange(resolTopo[1] - 1): + for k in np.arange(resolTopo[2] - 1): + true_data[i, j, k] = scalar(i / ((resolTopo[0] - 1) * 1.), + j / ((resolTopo[1] - 1) * 1.), + k / ((resolTopo[2] - 1) * 1.)) + csf.discreteField[0].initialize(scalar) + assert np.allclose(csf.discreteField[0].data, true_data) + + +def test_init_vector(): + dom = pp.box.Box() + cvf = Field(dom, vector=True) + resolTopo = [33, 11, 17] + topo = pp.topology.Cartesian(dom, 3, resolTopo) + cvf.discretize(topo) + vector = lambda x, y, z: (x ** 2, 1.0 - y, 10 * z + 10.) + true_data_x = np.zeros_like(cvf.discreteField[0].data[0]) + true_data_y = np.zeros_like(cvf.discreteField[0].data[1]) + true_data_z = np.zeros_like(cvf.discreteField[0].data[2]) + for i in np.arange(resolTopo[0] - 1): + for j in np.arange(resolTopo[1] - 1): + for k in np.arange(resolTopo[2] - 1): + true_data_x[i, j, k], \ + true_data_y[i, j, k], \ + true_data_z[i, j, k] = \ + vector(i / ((resolTopo[0] - 1) * 1.), + j / ((resolTopo[1] - 1) * 1.), + k / ((resolTopo[2] - 1) * 1.)) + cvf.discreteField[0].initialize(vector) + assert np.allclose(cvf.discreteField[0].data[0], true_data_x) + assert np.allclose(cvf.discreteField[0].data[1], true_data_y) + assert np.allclose(cvf.discreteField[0].data[2], true_data_z) + + +def test_accessing_vector_element(): + """Vector element access""" + dom = pp.box.Box() + cvf = Field(dom, vector=True) + resolTopo = [33, 11, 17] + topo = pp.topology.Cartesian(dom, 3, resolTopo) + cvf.discretize(topo) + vector = lambda x, y, z: (x ** 2, 1.0 - y, 10 * z + 10.) + true_data_x = np.zeros_like(cvf.discreteField[0].data[0]) + true_data_y = np.zeros_like(cvf.discreteField[0].data[1]) + true_data_z = np.zeros_like(cvf.discreteField[0].data[2]) + cvf.discreteField[0].initialize(vector) + print cvf.discreteField[0][:, :, :].shape + tshape = (3, resolTopo[0] - 1, resolTopo[1] - 1, resolTopo[2] - 1) + assert np.equal(cvf.discreteField[0][:, :, :].shape, tshape).all() + tshape = (resolTopo[0] - 1, resolTopo[1] - 1, resolTopo[2] - 1) + assert np.equal(cvf.discreteField[0][0][:, :, :].shape, tshape).all() + tshape = (3,) + assert np.equal(cvf.discreteField[0][0, 0, 0].shape, tshape).all() + for i in np.arange(resolTopo[0] - 1): + for j in np.arange(resolTopo[1] - 1): + for k in np.arange(resolTopo[2] - 1): + true_data_x[i, j, k], \ + true_data_y[i, j, k], \ + true_data_z[i, j, k] = \ + vector(i / ((resolTopo[0] - 1) * 1.), + j / ((resolTopo[1] - 1) * 1.), + k / ((resolTopo[2] - 1) * 1.)) + assert np.allclose(cvf.discreteField[0][0][i, j, k], + true_data_x[i, j, k]) + assert np.allclose(cvf.discreteField[0][1, i, j, k], + true_data_y[i, j, k]) + assert np.allclose(cvf.discreteField[0][i, j, k], + [true_data_x[i, j, k], + true_data_y[i, j, k], + true_data_z[i, j, k]]) + assert np.allclose(cvf.discreteField[0][0], true_data_x) + assert np.allclose(cvf.discreteField[0][1], true_data_y) + assert np.allclose(cvf.discreteField[0][2], true_data_z) + assert np.allclose(cvf.discreteField[0][0][:, 1, 2], true_data_x[:, 1, 2]) + assert np.allclose(cvf.discreteField[0][0, :, 1, 2], true_data_x[:, 1, 2]) + #set item + cvf.discreteField[0][1, 2, 3] = [1., 2., 3.] + assert np.allclose(cvf.discreteField[0][1, 2, 3], [1., 2., 3.]) + cvf.discreteField[0][0, 1, :] = 5. + true_data = [np.ones_like(cvf.discreteField[0].data[0][0, 1, :]) * 5., + np.ones_like(cvf.discreteField[0].data[0][0, 1, :]) * 5., + np.ones_like(cvf.discreteField[0].data[0][0, 1, :]) * 5.] + assert np.allclose(cvf.discreteField[0][0, 1, :], true_data) + cvf.discreteField[0][0][..., 9] = -1. + assert np.allclose(cvf.discreteField[0][0][..., 9], + np.ones_like(true_data_x[..., 9]) * -1.) + + diff --git a/HySoP/hysop/fields/tests/test_field.py b/HySoP/hysop/fields/tests/test_field.py new file mode 100644 index 0000000000000000000000000000000000000000..fd671a3d98efdb27dce333406f4f98f4bd664386 --- /dev/null +++ b/HySoP/hysop/fields/tests/test_field.py @@ -0,0 +1,42 @@ +""" +Testing parmepy.field.continuous.Field +""" +import parmepy as pp +from parmepy.fields.continuous import Field +from parmepy.fields.analytical import AnalyticalField + + +def test_discretization(): + """Test multiple discretizations""" + dom = pp.box.Box() + cf = Field(dom) + resolTopo = [33, 33, 17] + topo = pp.topology.Cartesian(dom, 3, resolTopo) + cf.discretize(topo) + assert cf._fieldId == 0 + assert len(cf.discreteField) == 1 + assert len(cf.topologies) == 1 + assert cf.topologies[0] == topo + cf.discretize(topo) + assert cf._fieldId == 0 + assert len(cf.discreteField) == 1 + assert len(cf.topologies) == 1 + assert cf.topologies[0] == topo + resolTopo = [11, 17, 19] + topo2 = pp.topology.Cartesian(dom, 3, resolTopo) + cf.discretize(topo2) + assert cf._fieldId == 1 + assert len(cf.discreteField) == 2 + assert len(cf.topologies) == 2 + assert cf.topologies[0] == topo + assert cf.topologies[1] == topo2 + + +def test_analytical(): + """Test formula""" + dom = pp.box.Box() + formula = lambda x, y, z: (x * y * z, 1.0, -x * y * z) + caf = AnalyticalField(dom, formula) + assert caf.value(0., 0., 0.) == (0., 1., 0.) + assert caf.value(1., 1., 1.) == (1., 1., -1.) + assert caf.value(1., 2., 3.) == (6., 1., -6.) diff --git a/HySoP/hysop/fields/vector.py b/HySoP/hysop/fields/vector.py new file mode 100644 index 0000000000000000000000000000000000000000..02ffb0c7c54f44bd19821a7ae1a7c2b4ed642749 --- /dev/null +++ b/HySoP/hysop/fields/vector.py @@ -0,0 +1,122 @@ +""" +@package parmepy.fields.vector + +Discrete variable scalar description. +""" +from parmepy.constants import np, ORDER, PARMES_REAL +from parmepy.fields.discrete import DiscreteField + + +class VectorField(DiscreteField): + """ + A vector field, defined on each grid of each subdomain + of the given topology. + Vector field is composed of several ScalarFields + """ + + def __init__(self, parent, topology=None, name="?", idFromParent=None): + """Constructor for a vector field""" + super(VectorField, self).__init__(parent, topology, + name, idFromParent) + self.data = [np.zeros((self.resolution), + dtype=PARMES_REAL, order=ORDER) + for d in xrange(self.dimension)] + self.vector = True + + def __getitem__(self, i): + """ Access to the content of the field. + + @param i : requested index. + @return data[i] depending on i. + + Usage (3D): \n + A = VectorField(...), We have 3 components len(a.data) == 3. \n + Following instructions access to index 2,1,1 of y component: + @li A[1,2,1,1] + @li A[1][2,1,1] + @li Access to whole vector of index 2,1,1: A[2,1,1] + @note Access to all datas as an array : A[:,:,:] + (resulting shape is (dim, Nx, Ny, Nz)) + @note Access to all datas in y component as an array: + A[1][:,:,:] (resulting shape is (Nx, Ny, Nz)) + """ + try: + if len(i) == len(self.data): + return np.asarray([data.__getitem__(i) for data in self.data]) + else: + return self.data[i[0]].__getitem__(tuple(i[1:])) + except (TypeError): + return self.data[i] + + def __setitem__(self, i, value): + """ + Set the content of the vector field component at position i. + + @param i : requested index. + @param value : value to set. + + Usage :\n + A = VectorField(topo)\n + A[2,1,1] = 12.0 # Calls A.data[d][2,1,1] = 12.0 for all components d.\n + A[2,1,1] = [12.0, 13.0, 14.0] # Calls A.data[0][2,1,1] = 12.0, + A.data[1][2,1,1] = 13.0 and A.data[2][2,1,1] = 14.0\n + A[1][2,1,1] = 13.0 # Calls A.data[1][2,1,1] = 12.0 + A[1,2,1,1] = 13.0 # Calls A.data[1][2,1,1] = 12.0 + """ + if len(i) == len(self.data): + try: + [data.__setitem__(i, v) for data, v in zip(self.data, value)] + except (TypeError): + [data.__setitem__(i, value) for data in self.data] + elif len(i) > len(self.data): + self.data[i[0]].__setitem__(tuple(i[1:]), value) + + def initialize(self, formula=None): + """ + Initialize values with given formula. + + @param formula : formula to apply on coordinates. + + Formula need to be vectorized by numpy. + Therefore formula must have the following signature: + @li 3D : float, float, float formula(float x, float y, float z) + @li 2D : float, float formula(float x, float y) + where x,y,z are point coordinates. + """ + if formula is not None: + print "...", + v_formula = np.vectorize(formula) + if self.dimension == 3: + self.data[0], self.data[1], self.data[2] = \ + v_formula(*self.topology.mesh.coords) + self.data[0] = np.asarray(self.data[0], + dtype=PARMES_REAL, order=ORDER) + self.data[1] = np.asarray(self.data[1], + dtype=PARMES_REAL, order=ORDER) + self.data[2] = np.asarray(self.data[2], + dtype=PARMES_REAL, order=ORDER) + elif self.dimension == 2: + self.data[0], self.data[1] = \ + v_formula(*self.topology.mesh.coords) + self.data[0] = np.asarray(self.data[0], + dtype=PARMES_REAL, order=ORDER) + self.data[1] = np.asarray(self.data[1], + dtype=PARMES_REAL, order=ORDER) + + self.contains_data = True + else: + print "No formula", + + def __str__(self): + """ Display class information """ + s = '[' + str(self.topology.rank) + '] ' + s += self.name + " " + str(self.dimension) + s += 'D discrete vector field with resolution ' + s += str([data.shape for data in self.data]) + "." + return s + "\n" + + +if __name__ == "__main__": + print __doc__ + print "- Provided class : VectorField" + print ScalarField.__doc__ diff --git a/HySoP/hysop/gpu/__init__.py b/HySoP/hysop/gpu/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b0ed4badd6725a7820478519415dc14f638be9bf --- /dev/null +++ b/HySoP/hysop/gpu/__init__.py @@ -0,0 +1,55 @@ +""" +@package parmepy.gpu +@file parmepy/gpu/__init__.py + +Everything concerning GPU in Parmes. + +""" +from parmepy.constants import np, os +import pyopencl as cl + +## GPU deflault sources +GPU_SRC = os.path.join(__path__[0], "cl_src", '') +PARMES_REAL_GPU = np.float32 +PARMES_DOUBLE_GPU = np.float64 +PARMES_INTEGER_GPU = np.uint32 + + +def _check_double_precision_capability(device, prec): + ## Precision supported + print " Precision capability ", + capable = True + if prec is np.float32: + print "for Single Precision: " + for v in ['DENORM', 'INF_NAN', + 'ROUND_TO_NEAREST', 'ROUND_TO_ZERO', 'ROUND_TO_INF', + 'FMA', 'CORRECTLY_ROUNDED_DIVIDE_SQRT', 'SOFT_FLOAT']: + try: + if eval('(device.single_fp_config &' + + ' cl.device_fp_config.' + + v + ') == cl.device_fp_config.' + v): + print v + except AttributeError as ae: + if v is 'CORRECTLY_ROUNDED_DIVIDE_SQRT': + capable = False + print v, 'is not supported in OpenCL C 1.2.\n', + print ' Exception catched : ', ae + else: + print "for Double Precision: " + if device.double_fp_config > 0: + for v in ['DENORM', 'INF_NAN', 'FMA', + 'ROUND_TO_NEAREST', 'ROUND_TO_ZERO', 'ROUND_TO_INF', + 'CORRECTLY_ROUNDED_DIVIDE_SQRT', 'SOFT_FLOAT']: + try: + if eval('(sdevice.double_fp_config &' + + ' cl.device_fp_config.' + + v + ') == cl.device_fp_config.' + v): + print v + except AttributeError as ae: + if v is 'CORRECTLY_ROUNDED_DIVIDE_SQRT': + capable = False + print v, 'is supported in OpenCL C 1.2.\n', + print ' Exception catched : ', ae + else: + raise ValueError("Double Precision is not supported by device") + return capable diff --git a/HySoP/hysop/gpu/cl_src/advection_2D_builtin.cl b/HySoP/hysop/gpu/cl_src/advection_2D_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..008be61a924d04004e424cf66c0dadd509ac59c2 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_2D_builtin.cl @@ -0,0 +1,34 @@ +/** + * 2D advection kernel. + * + * Computations are done using builtin functions on vector types. + * A local array is used un local memory as a cache for velocity. + * + * @param gvelo Velocity. + * @param ppos Particle position. + * @param dt Time step. + * @param min_position Domain lower point. + * @param dx Space step. + */ +__kernel void advection(__global const float* gvelo, + __global float* ppos, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + int ind, i_ind, i_ind_p; + float invdx = 1.0/dx; + uint i; + float v, y, p; + for(i=gidX; i<WIDTH; i+=WGN) + { + v = gvelo[i+gidY*WIDTH]; + p = fma((float)(0.5*dt),v,i*dx); + ind = convert_int_rtn(p * invdx); + y = (fma(- convert_float(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((ind + 1) % WIDTH); + p=fma(mix(gvelo[i_ind+gidY*WIDTH],gvelo[i_ind_p+gidY*WIDTH],y),dt,i*dx + min_position); + ppos[i+gidY*WIDTH] = p; + } +} diff --git a/HySoP/hysop/gpu/cl_src/advection_2D_opt2_builtin.cl b/HySoP/hysop/gpu/cl_src/advection_2D_opt2_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..d99fe53429fe274cca78682038c06150eda981b9 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_2D_opt2_builtin.cl @@ -0,0 +1,49 @@ +/** + * 2D advection kernel. + * + * Memory reads and writes are performed using 2 element OpenCL vectors (float2 or double2). + * Computations are done using builtin functions on vector types. + * A local array is used un local memory as a cache for velocity. + * + * @param gvelo Velocity. + * @param ppos Particle position. + * @param dt Time step. + * @param min_position Domain lower point. + * @param dx Space step. + */ +__kernel void advection(__global const float* gvelo, + __global float* ppos, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + int2 ind, i_ind, i_ind_p; + float invdx = 1.0/dx; + uint i; + float2 v, vp, y, p, coord; + __local float velocity_cache[WIDTH]; + + for(i=gidX*2; i<WIDTH; i+=WGN*2) + { + v = vload2((i+gidY*WIDTH)/2, gvelo); + velocity_cache[i] = v.x; + velocity_cache[i+1] = v.y; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for(i=gidX*2; i<WIDTH; i+=WGN*2) + { + v = vload2(i/2, velocity_cache); + coord = (float2)(i*dx, (i+1)*dx); + p = fma((float2)(0.5*dt),v,coord); + ind = convert_int2_rtn(p * invdx); + y = (fma(- convert_float2(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float2)(velocity_cache[i_ind.x], velocity_cache[i_ind.y]); + vp = (float2)(velocity_cache[i_ind_p.x], velocity_cache[i_ind_p.y]); + p=fma(mix(v,vp,y),dt,coord + (float2)(min_position)); + vstore2(p, (i+gidY*WIDTH)/2, ppos); + } +} diff --git a/HySoP/hysop/gpu/cl_src/advection_2D_opt4_builtin.cl b/HySoP/hysop/gpu/cl_src/advection_2D_opt4_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..c9d9637ad6728e8b5346532d706618100e647324 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_2D_opt4_builtin.cl @@ -0,0 +1,53 @@ +/** + * 2D advection kernel. + * + * Memory reads and writes are performed using 4 element OpenCL vectors (float4 or double4). + * Computations are done using builtin functions on vector types. + * A local array is used un local memory as a cache for velocity. + * + * @param gvelo Velocity. + * @param ppos Particle position. + * @param dt Time step. + * @param min_position Domain lower point. + * @param dx Space step. + */ +__kernel void advection(__global const float* gvelo, + __global float* ppos, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + int4 ind, i_ind, i_ind_p; + float invdx = 1.0/dx; + uint i; + float4 v, vp, y, p, coord; + __local float velocity_cache[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=WGN*4) + { + v = vload4((i+gidY*WIDTH)/4, gvelo); + velocity_cache[i] = v.x; + velocity_cache[i+1] = v.y; + velocity_cache[i+2] = v.z; + velocity_cache[i+3] = v.w; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for(i=gidX*4; i<WIDTH; i+=WGN*4) + { + v = vload4(i/4, velocity_cache); + coord = (float4)(i*dx, (i+1)*dx, (i+2)*dx, (i+3)*dx); + p = fma((float4)(0.5*dt),v,coord); + ind = convert_int4_rtn(p * invdx); + y = (fma(- convert_float4(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float4)(velocity_cache[i_ind.x], velocity_cache[i_ind.y], + velocity_cache[i_ind.z], velocity_cache[i_ind.w]); + vp = (float4)(velocity_cache[i_ind_p.x], velocity_cache[i_ind_p.y], + velocity_cache[i_ind_p.z], velocity_cache[i_ind_p.w]); + p=fma(mix(v,vp,y),dt,coord + (float4)(min_position)); + vstore4(p, (i+gidY*WIDTH)/4, ppos); + } +} diff --git a/HySoP/hysop/gpu/cl_src/advection_2D_opt8_builtin.cl b/HySoP/hysop/gpu/cl_src/advection_2D_opt8_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..cab7c1dd42d87d26fb21c864a80211d430d2a460 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_2D_opt8_builtin.cl @@ -0,0 +1,76 @@ +/** + * 2D advection kernel. + * + * Memory reads and writes are performed using 8 element OpenCL vectors (float8 or double8). + * Computations are done using builtin functions on vector types. + * A local array is used un local memory as a cache for velocity. + * + * @param gvelo Velocity. + * @param ppos Particle position. + * @param dt Time step. + * @param min_position Domain lower point. + * @param dx Space step. + */ +__kernel void advection(__global const float* gvelo, + __global float* ppos, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + int8 ind, i_ind, i_ind_p; + float invdx = 1.0/dx; + uint i; + float8 v, vp, y, p, coord; + __local float velocity_cache[WIDTH]; + + for(i=gidX*8; i<WIDTH; i+=WGN*8) + { + v = vload8((i+gidY*WIDTH)/8, gvelo); + velocity_cache[i] = v.s0; + velocity_cache[i+1] = v.s1; + velocity_cache[i+2] = v.s2; + velocity_cache[i+3] = v.s3; + velocity_cache[i+4] = v.s4; + velocity_cache[i+5] = v.s5; + velocity_cache[i+6] = v.s6; + velocity_cache[i+7] = v.s7; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for(i=gidX*8; i<WIDTH; i+=WGN*8) + { + v = vload8(i/8, velocity_cache); + coord = (float8)(i*dx, + (i+1)*dx, + (i+2)*dx, + (i+3)*dx, + (i+4)*dx, + (i+5)*dx, + (i+6)*dx, + (i+7)*dx); + p = fma((float8)(0.5*dt),v,coord); + ind = convert_int8_rtn(p * invdx); + y = (fma(- convert_float8(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float8)(velocity_cache[i_ind.s0], + velocity_cache[i_ind.s1], + velocity_cache[i_ind.s2], + velocity_cache[i_ind.s3], + velocity_cache[i_ind.s4], + velocity_cache[i_ind.s5], + velocity_cache[i_ind.s6], + velocity_cache[i_ind.s7]); + vp = (float8)(velocity_cache[i_ind_p.s0], + velocity_cache[i_ind_p.s1], + velocity_cache[i_ind_p.s2], + velocity_cache[i_ind_p.s3], + velocity_cache[i_ind_p.s4], + velocity_cache[i_ind_p.s5], + velocity_cache[i_ind_p.s6], + velocity_cache[i_ind_p.s7]); + p = fma(mix(v,vp,y),dt,coord + (float8)(min_position)); + vstore8(p, (i+gidY*WIDTH)/8, ppos); + } +} diff --git a/HySoP/hysop/gpu/cl_src/advection_3D_builtin.cl b/HySoP/hysop/gpu/cl_src/advection_3D_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..c54dd72679192fcca254c170d8669d3cbe80742f --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_3D_builtin.cl @@ -0,0 +1,37 @@ +/** + * 3D advection kernel. + * + * Computations are done using builtin functions on vector types. + * A local array is used un local memory as a cache for velocity. + * + * @param gvelo Velocity. + * @param ppos Particle position. + * @param dt Time step. + * @param min_position Domain lower point. + * @param dx Space step. + */ +__kernel void advection(__global const float* gvelo, + __global float* ppos, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + uint gidZ = get_global_id(2); + int ind, i_ind, i_ind_p; + float invdx = 1.0/dx; + uint i; + float v, y, p; + for(i=gidX; i<WIDTH; i+=WGN) + { + v = gvelo[i+gidY*WIDTH+gidZ*WIDTH*WIDTH]; + p = fma((float)(0.5*dt),v,i*dx); + ind = convert_int_rtn(p * invdx); + y = (fma(- convert_float(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((ind + 1) % WIDTH); + p=fma(mix(gvelo[i_ind+gidY*WIDTH+gidZ*WIDTH*WIDTH], + gvelo[i_ind_p+gidY*WIDTH+gidZ*WIDTH*WIDTH],y), + dt,i*dx + min_position); + ppos[i+gidY*WIDTH+gidZ*WIDTH*WIDTH] = p; + } +} diff --git a/HySoP/hysop/gpu/cl_src/advection_3D_opt2_builtin.cl b/HySoP/hysop/gpu/cl_src/advection_3D_opt2_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..3891bb10dcdc5442cb14e4857f385eb4f4c7f4c8 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_3D_opt2_builtin.cl @@ -0,0 +1,50 @@ +/** + * 3D advection kernel. + * + * Memory reads and writes are performed using 2 element OpenCL vectors (float2 or double2). + * Computations are done using builtin functions on vector types. + * A local array is used un local memory as a cache for velocity. + * + * @param gvelo Velocity. + * @param ppos Particle position. + * @param dt Time step. + * @param min_position Domain lower point. + * @param dx Space step. + */ +__kernel void advection(__global const float* gvelo, + __global float* ppos, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + uint gidZ = get_global_id(2); + int2 ind, i_ind, i_ind_p; + float invdx = 1.0/dx; + uint i; + float2 v, vp, y, p, coord; + __local float velocity_cache[WIDTH]; + + for(i=gidX*2; i<WIDTH; i+=WGN*2) + { + v = vload2((i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/2, gvelo); + velocity_cache[i] = v.x; + velocity_cache[i+1] = v.y; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for(i=gidX*2; i<WIDTH; i+=WGN*2) + { + v = vload2(i/2, velocity_cache); + coord = (float2)(i*dx, (i+1)*dx); + p = fma((float2)(0.5*dt),v,coord); + ind = convert_int2_rtn(p * invdx); + y = (fma(- convert_float2(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float2)(velocity_cache[i_ind.x], velocity_cache[i_ind.y]); + vp = (float2)(velocity_cache[i_ind_p.x], velocity_cache[i_ind_p.y]); + p=fma(mix(v,vp,y),dt,coord + (float2)(min_position)); + vstore2(p, (i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/2, ppos); + } +} diff --git a/HySoP/hysop/gpu/cl_src/advection_3D_opt4_builtin.cl b/HySoP/hysop/gpu/cl_src/advection_3D_opt4_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..49ae8e1d4bb4ab796e0b659eaeadc7e8177bb0eb --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_3D_opt4_builtin.cl @@ -0,0 +1,54 @@ +/** + * 3D advection kernel. + * + * Memory reads and writes are performed using 4 element OpenCL vectors (float4 or double4). + * Computations are done using builtin functions on vector types. + * A local array is used un local memory as a cache for velocity. + * + * @param gvelo Velocity. + * @param ppos Particle position. + * @param dt Time step. + * @param min_position Domain lower point. + * @param dx Space step. + */ +__kernel void advection(__global const float* gvelo, + __global float* ppos, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + uint gidZ = get_global_id(2); + int4 ind, i_ind, i_ind_p; + float invdx = 1.0/dx; + uint i; + float4 v, vp, y, p, coord; + __local float velocity_cache[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=WGN*4) + { + v = vload4((i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, gvelo); + velocity_cache[i] = v.x; + velocity_cache[i+1] = v.y; + velocity_cache[i+2] = v.z; + velocity_cache[i+3] = v.w; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for(i=gidX*4; i<WIDTH; i+=WGN*4) + { + v = vload4(i/4, velocity_cache); + coord = (float4)(i*dx, (i+1)*dx, (i+2)*dx, (i+3)*dx); + p = fma((float4)(0.5*dt),v,coord); + ind = convert_int4_rtn(p * invdx); + y = (fma(- convert_float4(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float4)(velocity_cache[i_ind.x], velocity_cache[i_ind.y], + velocity_cache[i_ind.z], velocity_cache[i_ind.w]); + vp = (float4)(velocity_cache[i_ind_p.x], velocity_cache[i_ind_p.y], + velocity_cache[i_ind_p.z], velocity_cache[i_ind_p.w]); + p=fma(mix(v,vp,y),dt,coord + (float4)(min_position)); + vstore4(p, (i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, ppos); + } +} diff --git a/HySoP/hysop/gpu/cl_src/advection_3D_opt8_builtin.cl b/HySoP/hysop/gpu/cl_src/advection_3D_opt8_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..db3fb09a765970c6a15b7c3f43a99fc3e92c46bb --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_3D_opt8_builtin.cl @@ -0,0 +1,77 @@ +/** + * 3D advection kernel. + * + * Memory reads and writes are performed using 8 element OpenCL vectors (float8 or double8). + * Computations are done using builtin functions on vector types. + * A local array is used un local memory as a cache for velocity. + * + * @param gvelo Velocity. + * @param ppos Particle position. + * @param dt Time step. + * @param min_position Domain lower point. + * @param dx Space step. + */ +__kernel void advection(__global const float* gvelo, + __global float* ppos, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + uint gidZ = get_global_id(2); + int8 ind, i_ind, i_ind_p; + float invdx = 1.0/dx; + uint i; + float8 v, vp, y, p, coord; + __local float velocity_cache[WIDTH]; + + for(i=gidX*8; i<WIDTH; i+=WGN*8) + { + v = vload8((i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/8, gvelo); + velocity_cache[i] = v.s0; + velocity_cache[i+1] = v.s1; + velocity_cache[i+2] = v.s2; + velocity_cache[i+3] = v.s3; + velocity_cache[i+4] = v.s4; + velocity_cache[i+5] = v.s5; + velocity_cache[i+6] = v.s6; + velocity_cache[i+7] = v.s7; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for(i=gidX*8; i<WIDTH; i+=WGN*8) + { + v = vload8(i/8, velocity_cache); + coord = (float8)(i*dx, + (i+1)*dx, + (i+2)*dx, + (i+3)*dx, + (i+4)*dx, + (i+5)*dx, + (i+6)*dx, + (i+7)*dx); + p = fma((float8)(0.5*dt),v,coord); + ind = convert_int8_rtn(p * invdx); + y = (fma(- convert_float8(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float8)(velocity_cache[i_ind.s0], + velocity_cache[i_ind.s1], + velocity_cache[i_ind.s2], + velocity_cache[i_ind.s3], + velocity_cache[i_ind.s4], + velocity_cache[i_ind.s5], + velocity_cache[i_ind.s6], + velocity_cache[i_ind.s7]); + vp = (float8)(velocity_cache[i_ind_p.s0], + velocity_cache[i_ind_p.s1], + velocity_cache[i_ind_p.s2], + velocity_cache[i_ind_p.s3], + velocity_cache[i_ind_p.s4], + velocity_cache[i_ind_p.s5], + velocity_cache[i_ind_p.s6], + velocity_cache[i_ind_p.s7]); + p = fma(mix(v,vp,y),dt,coord + (float8)(min_position)); + vstore8(p, (i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/8, ppos); + } +} diff --git a/HySoP/hysop/gpu/cl_src/advection_and_remesh_2D_opt4_builtin_noBC.cl b/HySoP/hysop/gpu/cl_src/advection_and_remesh_2D_opt4_builtin_noBC.cl new file mode 100644 index 0000000000000000000000000000000000000000..2eaec614a26d971b549d417d48d85a2ce4f5dd80 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_and_remesh_2D_opt4_builtin_noBC.cl @@ -0,0 +1,95 @@ + +inline uint noBC_id(int id, int nb_part){ + return (id%nb_part)*WGN+(id/nb_part); +} +__kernel void advection_and_remeshing(__global const float* gvelo, + __global const float* pscal, + __global float* gscal, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + float invdx = 1.0/dx; + int4 ind, i_ind, i_ind_p; + uint i, nb_part = WIDTH/WGN; + float4 p, s, y, gs, v, vp, coord; + uint4 index4; + + __local float gscal_loc[WIDTH]; + __local float gvelo_loc[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + { + v = vload4((i+gidY*WIDTH)/4, gvelo); + gvelo_loc[noBC_id(i,nb_part)] = v.x; + gvelo_loc[noBC_id(i+1,nb_part)] = v.y; + gvelo_loc[noBC_id(i+2,nb_part)] = v.z; + gvelo_loc[noBC_id(i+3,nb_part)] = v.w; + gscal_loc[i] = 0.0; + gscal_loc[i+1] = 0.0; + gscal_loc[i+2] = 0.0; + gscal_loc[i+3] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=4) + { + v = (float4)(gvelo_loc[noBC_id(i,nb_part)], + gvelo_loc[noBC_id(i+1,nb_part)], + gvelo_loc[noBC_id(i+2,nb_part)], + gvelo_loc[noBC_id(i+3,nb_part)]); + coord = (float4)(i*dx, (i+1)*dx, (i+2)*dx, (i+3)*dx); + p = fma((float4)(0.5*dt),v,coord); + ind = convert_int4_rtn(p * invdx); + y = (fma(- convert_float4(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float4)(gvelo_loc[noBC_id(i_ind.x,nb_part)], gvelo_loc[noBC_id(i_ind.y,nb_part)], + gvelo_loc[noBC_id(i_ind.z,nb_part)], gvelo_loc[noBC_id(i_ind.w,nb_part)]); + vp = (float4)(gvelo_loc[noBC_id(i_ind_p.x,nb_part)], gvelo_loc[noBC_id(i_ind_p.y,nb_part)], + gvelo_loc[noBC_id(i_ind_p.z,nb_part)], gvelo_loc[noBC_id(i_ind_p.w,nb_part)]); + p=fma(mix(v,vp,y),dt,coord); + + s = vload4((i + gidY*WIDTH)/4, pscal); + ind = convert_int4_rtn(p * invdx); + y = (p - convert_float4(ind) * dx) * invdx; + index4 = convert_uint4((ind - 2 + WIDTH) % WIDTH); + gscal_loc[noBC_id(index4.x,nb_part)] += (alpha(y.x,s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (alpha(y.y,s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (alpha(y.z,s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (alpha(y.w,s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (beta(y.x,s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (beta(y.y,s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (beta(y.z,s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (beta(y.w,s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (gamma(y.x, s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (gamma(y.y, s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (gamma(y.z, s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (gamma(y.w, s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (delta(y.x, s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (delta(y.y, s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (delta(y.z, s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (delta(y.w, s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (eta(y.x, s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (eta(y.y, s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (eta(y.z, s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (eta(y.w, s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (zeta(y.x, s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (zeta(y.y, s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (zeta(y.z, s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (zeta(y.w, s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + vstore4((float4)(gscal_loc[noBC_id(i,nb_part)],gscal_loc[noBC_id(i+1,nb_part)], gscal_loc[noBC_id(i+2,nb_part)], gscal_loc[noBC_id(i+3,nb_part)]), (i + gidY*WIDTH)/4, gscal); +} diff --git a/HySoP/hysop/gpu/cl_src/advection_and_remesh_2D_opt8_builtin_noBC.cl b/HySoP/hysop/gpu/cl_src/advection_and_remesh_2D_opt8_builtin_noBC.cl new file mode 100644 index 0000000000000000000000000000000000000000..289b9d4a6a0c4fc9bd2f84c09e96d294385485ed --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_and_remesh_2D_opt8_builtin_noBC.cl @@ -0,0 +1,157 @@ + +inline uint noBC_id(int id, int nb_part){ + return (id%nb_part)*WGN+(id/nb_part); +} +__kernel void advection_and_remeshing(__global const float* gvelo, + __global const float* pscal, + __global float* gscal, + float dt,float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + float invdx = 1.0/dx; + int8 ind, i_ind, i_ind_p; + uint i, nb_part = WIDTH/WGN; + float8 p, s, y, gs, v, vp, coord; + uint8 index8; + + __local float gscal_loc[WIDTH]; + __local float gvelo_loc[WIDTH]; + + for(i=gidX*8; i<WIDTH; i+=(WGN*8)) + { + v = vload8((i+gidY*WIDTH)/8, gvelo); + gvelo_loc[noBC_id(i,nb_part)] = v.s0; + gvelo_loc[noBC_id(i+1,nb_part)] = v.s1; + gvelo_loc[noBC_id(i+2,nb_part)] = v.s2; + gvelo_loc[noBC_id(i+3,nb_part)] = v.s3; + gvelo_loc[noBC_id(i+4,nb_part)] = v.s4; + gvelo_loc[noBC_id(i+5,nb_part)] = v.s5; + gvelo_loc[noBC_id(i+6,nb_part)] = v.s6; + gvelo_loc[noBC_id(i+7,nb_part)] = v.s7; + gscal_loc[i] = 0.0; + gscal_loc[i+1] = 0.0; + gscal_loc[i+2] = 0.0; + gscal_loc[i+3] = 0.0; + gscal_loc[i+4] = 0.0; + gscal_loc[i+5] = 0.0; + gscal_loc[i+6] = 0.0; + gscal_loc[i+7] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=8) + { + v = (float8)(gvelo_loc[noBC_id(i,nb_part)], + gvelo_loc[noBC_id(i+1,nb_part)], + gvelo_loc[noBC_id(i+2,nb_part)], + gvelo_loc[noBC_id(i+3,nb_part)], + gvelo_loc[noBC_id(i+4,nb_part)], + gvelo_loc[noBC_id(i+5,nb_part)], + gvelo_loc[noBC_id(i+6,nb_part)], + gvelo_loc[noBC_id(i+7,nb_part)]); + coord = (float8)(i*dx, + (i+1)*dx, + (i+2)*dx, + (i+3)*dx, + (i+4)*dx, + (i+5)*dx, + (i+6)*dx, + (i+7)*dx); + p = fma((float8)(0.5*dt),v,coord); + ind = convert_int8_rtn(p * invdx); + y = (fma(- convert_float8(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float8)(gvelo_loc[noBC_id(i_ind.s0,nb_part)], + gvelo_loc[noBC_id(i_ind.s1,nb_part)], + gvelo_loc[noBC_id(i_ind.s2,nb_part)], + gvelo_loc[noBC_id(i_ind.s3,nb_part)], + gvelo_loc[noBC_id(i_ind.s4,nb_part)], + gvelo_loc[noBC_id(i_ind.s5,nb_part)], + gvelo_loc[noBC_id(i_ind.s6,nb_part)], + gvelo_loc[noBC_id(i_ind.s7,nb_part)]); + vp = (float8)(gvelo_loc[noBC_id(i_ind_p.s0,nb_part)], + gvelo_loc[noBC_id(i_ind_p.s1,nb_part)], + gvelo_loc[noBC_id(i_ind_p.s2,nb_part)], + gvelo_loc[noBC_id(i_ind_p.s3,nb_part)], + gvelo_loc[noBC_id(i_ind_p.s4,nb_part)], + gvelo_loc[noBC_id(i_ind_p.s5,nb_part)], + gvelo_loc[noBC_id(i_ind_p.s6,nb_part)], + gvelo_loc[noBC_id(i_ind_p.s7,nb_part)]); + p = fma(mix(v,vp,y),dt,coord); + + s = vload8((i + gidY*WIDTH)/8, pscal); + ind = convert_int8_rtn(p * invdx); + y = (p - convert_float8(ind) * dx) * invdx; + index8 = convert_uint8((ind - 2 + WIDTH) % WIDTH); + gscal_loc[noBC_id(index8.s0,nb_part)] += (alpha(y.s0,s.s0)); + gscal_loc[noBC_id(index8.s1,nb_part)] += (alpha(y.s1,s.s1)); + gscal_loc[noBC_id(index8.s2,nb_part)] += (alpha(y.s2,s.s2)); + gscal_loc[noBC_id(index8.s3,nb_part)] += (alpha(y.s3,s.s3)); + gscal_loc[noBC_id(index8.s4,nb_part)] += (alpha(y.s4,s.s4)); + gscal_loc[noBC_id(index8.s5,nb_part)] += (alpha(y.s5,s.s5)); + gscal_loc[noBC_id(index8.s6,nb_part)] += (alpha(y.s6,s.s6)); + gscal_loc[noBC_id(index8.s7,nb_part)] += (alpha(y.s7,s.s7)); + barrier(CLK_LOCAL_MEM_FENCE); + index8 = (index8 + 1) % WIDTH; + gscal_loc[noBC_id(index8.s0,nb_part)] += (beta(y.s0,s.s0)); + gscal_loc[noBC_id(index8.s1,nb_part)] += (beta(y.s1,s.s1)); + gscal_loc[noBC_id(index8.s2,nb_part)] += (beta(y.s2,s.s2)); + gscal_loc[noBC_id(index8.s3,nb_part)] += (beta(y.s3,s.s3)); + gscal_loc[noBC_id(index8.s4,nb_part)] += (beta(y.s4,s.s4)); + gscal_loc[noBC_id(index8.s5,nb_part)] += (beta(y.s5,s.s5)); + gscal_loc[noBC_id(index8.s6,nb_part)] += (beta(y.s6,s.s6)); + gscal_loc[noBC_id(index8.s7,nb_part)] += (beta(y.s7,s.s7)); + barrier(CLK_LOCAL_MEM_FENCE); + index8 = (index8 + 1) % WIDTH; + gscal_loc[noBC_id(index8.s0,nb_part)] += (gamma(y.s0,s.s0)); + gscal_loc[noBC_id(index8.s1,nb_part)] += (gamma(y.s1,s.s1)); + gscal_loc[noBC_id(index8.s2,nb_part)] += (gamma(y.s2,s.s2)); + gscal_loc[noBC_id(index8.s3,nb_part)] += (gamma(y.s3,s.s3)); + gscal_loc[noBC_id(index8.s4,nb_part)] += (gamma(y.s4,s.s4)); + gscal_loc[noBC_id(index8.s5,nb_part)] += (gamma(y.s5,s.s5)); + gscal_loc[noBC_id(index8.s6,nb_part)] += (gamma(y.s6,s.s6)); + gscal_loc[noBC_id(index8.s7,nb_part)] += (gamma(y.s7,s.s7)); + barrier(CLK_LOCAL_MEM_FENCE); + index8 = (index8 + 1) % WIDTH; + gscal_loc[noBC_id(index8.s0,nb_part)] += (delta(y.s0,s.s0)); + gscal_loc[noBC_id(index8.s1,nb_part)] += (delta(y.s1,s.s1)); + gscal_loc[noBC_id(index8.s2,nb_part)] += (delta(y.s2,s.s2)); + gscal_loc[noBC_id(index8.s3,nb_part)] += (delta(y.s3,s.s3)); + gscal_loc[noBC_id(index8.s4,nb_part)] += (delta(y.s4,s.s4)); + gscal_loc[noBC_id(index8.s5,nb_part)] += (delta(y.s5,s.s5)); + gscal_loc[noBC_id(index8.s6,nb_part)] += (delta(y.s6,s.s6)); + gscal_loc[noBC_id(index8.s7,nb_part)] += (delta(y.s7,s.s7)); + barrier(CLK_LOCAL_MEM_FENCE); + index8 = (index8 + 1) % WIDTH; + gscal_loc[noBC_id(index8.s0,nb_part)] += (eta(y.s0,s.s0)); + gscal_loc[noBC_id(index8.s1,nb_part)] += (eta(y.s1,s.s1)); + gscal_loc[noBC_id(index8.s2,nb_part)] += (eta(y.s2,s.s2)); + gscal_loc[noBC_id(index8.s3,nb_part)] += (eta(y.s3,s.s3)); + gscal_loc[noBC_id(index8.s4,nb_part)] += (eta(y.s4,s.s4)); + gscal_loc[noBC_id(index8.s5,nb_part)] += (eta(y.s5,s.s5)); + gscal_loc[noBC_id(index8.s6,nb_part)] += (eta(y.s6,s.s6)); + gscal_loc[noBC_id(index8.s7,nb_part)] += (eta(y.s7,s.s7)); + barrier(CLK_LOCAL_MEM_FENCE); + index8 = (index8 + 1) % WIDTH; + gscal_loc[noBC_id(index8.s0,nb_part)] += (zeta(y.s0,s.s0)); + gscal_loc[noBC_id(index8.s1,nb_part)] += (zeta(y.s1,s.s1)); + gscal_loc[noBC_id(index8.s2,nb_part)] += (zeta(y.s2,s.s2)); + gscal_loc[noBC_id(index8.s3,nb_part)] += (zeta(y.s3,s.s3)); + gscal_loc[noBC_id(index8.s4,nb_part)] += (zeta(y.s4,s.s4)); + gscal_loc[noBC_id(index8.s5,nb_part)] += (zeta(y.s5,s.s5)); + gscal_loc[noBC_id(index8.s6,nb_part)] += (zeta(y.s6,s.s6)); + gscal_loc[noBC_id(index8.s7,nb_part)] += (zeta(y.s7,s.s7)); + barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*8; i<WIDTH; i+=(WGN*8)) + vstore8((float8)(gscal_loc[noBC_id(i,nb_part)], + gscal_loc[noBC_id(i+1,nb_part)], + gscal_loc[noBC_id(i+2,nb_part)], + gscal_loc[noBC_id(i+3,nb_part)], + gscal_loc[noBC_id(i+4,nb_part)], + gscal_loc[noBC_id(i+5,nb_part)], + gscal_loc[noBC_id(i+6,nb_part)], + gscal_loc[noBC_id(i+7,nb_part)]), (i + gidY*WIDTH)/8, gscal); +} diff --git a/HySoP/hysop/gpu/cl_src/advection_and_remesh_3D_opt4_builtin_private4.cl b/HySoP/hysop/gpu/cl_src/advection_and_remesh_3D_opt4_builtin_private4.cl new file mode 100644 index 0000000000000000000000000000000000000000..46e9bc59b2ea793d24cd36c429a41bb0e448ff8e --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_and_remesh_3D_opt4_builtin_private4.cl @@ -0,0 +1,101 @@ + +__kernel void advection_and_remeshing(__global const float* gvelo, + __global const float* pscal, + __global float* gscal, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + uint gidZ = get_global_id(2); + float invdx = 1.0/dx; + int4 ind, i_ind, i_ind_p; + uint i, nb_part = WIDTH/WGN; + float4 p, s, y, gs, v, vp, coord; + uint4 index4; + + __local float gscal_loc[WIDTH]; + __local float gvelo_loc[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + { + v = vload4((i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, gvelo); + gvelo_loc[i] = v.x; + gvelo_loc[i+1] = v.y; + gvelo_loc[i+2] = v.z; + gvelo_loc[i+3] = v.w; + gscal_loc[i] = 0.0; + gscal_loc[i+1] = 0.0; + gscal_loc[i+2] = 0.0; + gscal_loc[i+3] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=4) + { + v = vload4(i/4, gvelo_loc); + coord = (float4)(i*dx, (i+1)*dx, (i+2)*dx, (i+3)*dx); + p = fma((float4)(0.5*dt),v,coord); + ind = convert_int4_rtn(p * invdx); + y = (fma(- convert_float4(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float4)(gvelo_loc[i_ind.x], gvelo_loc[i_ind.y], + gvelo_loc[i_ind.z], gvelo_loc[i_ind.w]); + vp = (float4)(gvelo_loc[i_ind_p.x], gvelo_loc[i_ind_p.y], + gvelo_loc[i_ind_p.z], gvelo_loc[i_ind_p.w]); + p=fma(mix(v,vp,y),dt,coord); + + s = vload4((i + gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, pscal); + ind = convert_int4_rtn(p * invdx); + y = (p - convert_float4(ind) * dx) * invdx; + index4 = convert_uint4((ind - 2 + WIDTH) % WIDTH); + barrier(CLK_LOCAL_MEM_FENCE); + gs = (alpha(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + //barrier(CLK_LOCAL_MEM_FENCE); + gs = (beta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + //barrier(CLK_LOCAL_MEM_FENCE); + gs = (gamma(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + //barrier(CLK_LOCAL_MEM_FENCE); + gs = (delta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + //barrier(CLK_LOCAL_MEM_FENCE); + gs = (eta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + //barrier(CLK_LOCAL_MEM_FENCE); + gs = (zeta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + vstore4((float4)(gscal_loc[i],gscal_loc[i+1], gscal_loc[i+2], gscal_loc[i+3]), (i + gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, gscal); +} diff --git a/HySoP/hysop/gpu/cl_src/advection_and_remesh_3D_opt4_builtin_private4_noBC.cl b/HySoP/hysop/gpu/cl_src/advection_and_remesh_3D_opt4_builtin_private4_noBC.cl new file mode 100644 index 0000000000000000000000000000000000000000..5d221f67393d24f1f7614f422ef0396ad22d8c20 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection_and_remesh_3D_opt4_builtin_private4_noBC.cl @@ -0,0 +1,101 @@ + +inline uint noBC_id(int id, int nb_part){ + return (id%nb_part)*WGN+(id/nb_part); +} +__kernel void advection_and_remeshing(__global const float* gvelo, + __global const float* pscal, + __global float* gscal, + float dt, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + uint gidZ = get_global_id(2); + float invdx = 1.0/dx; + int4 ind, i_ind, i_ind_p; + uint i, nb_part = WIDTH/WGN; + float4 p, s, y, gs, v, vp, coord; + uint4 index4; + + __local float gscal_loc[WIDTH]; + __local float gvelo_loc[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + { + v = vload4((i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, gvelo); + gvelo_loc[noBC_id(i, nb_part)] = v.x; + gvelo_loc[noBC_id(i+1, nb_part)] = v.y; + gvelo_loc[noBC_id(i+2, nb_part)] = v.z; + gvelo_loc[noBC_id(i+3, nb_part)] = v.w; + gscal_loc[i] = 0.0; + gscal_loc[i+1] = 0.0; + gscal_loc[i+2] = 0.0; + gscal_loc[i+3] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=4) + { + v = (float4)(gvelo_loc[noBC_id(i,nb_part)], + gvelo_loc[noBC_id(i+1,nb_part)], + gvelo_loc[noBC_id(i+2,nb_part)], + gvelo_loc[noBC_id(i+3,nb_part)]); + coord = (float4)(i*dx, (i+1)*dx, (i+2)*dx, (i+3)*dx); + p = fma((float4)(0.5*dt),v,coord); + ind = convert_int4_rtn(p * invdx); + y = (fma(- convert_float4(ind),dx,p))* invdx ; + i_ind = ((ind + WIDTH) % WIDTH); + i_ind_p = ((i_ind + 1) % WIDTH); + v = (float4)(gvelo_loc[noBC_id(i_ind.x,nb_part)], gvelo_loc[noBC_id(i_ind.y,nb_part)], + gvelo_loc[noBC_id(i_ind.z,nb_part)], gvelo_loc[noBC_id(i_ind.w,nb_part)]); + vp = (float4)(gvelo_loc[noBC_id(i_ind_p.x,nb_part)], gvelo_loc[noBC_id(i_ind_p.y,nb_part)], + gvelo_loc[noBC_id(i_ind_p.z,nb_part)], gvelo_loc[noBC_id(i_ind_p.w,nb_part)]); + p=fma(mix(v,vp,y),dt,coord); + + s = vload4((i + gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, pscal); + ind = convert_int4_rtn(p * invdx); + y = (p - convert_float4(ind) * dx) * invdx; + index4 = convert_uint4((ind - 2 + WIDTH) % WIDTH); + gs = (alpha(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (beta(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (gamma(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (delta(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (eta(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (zeta(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + vstore4((float4)(gscal_loc[noBC_id(i, nb_part)],gscal_loc[noBC_id(i+1, nb_part)], gscal_loc[noBC_id(i+2, nb_part)], gscal_loc[noBC_id(i+3, nb_part)]), (i + gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, gscal); +} diff --git a/HySoP/hysop/gpu/cl_src/copy_2D.cl b/HySoP/hysop/gpu/cl_src/copy_2D.cl new file mode 100644 index 0000000000000000000000000000000000000000..1af25c6d58beb24ead4e25dfbd21cb78adc64613 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/copy_2D.cl @@ -0,0 +1,14 @@ + +__kernel void copy(__global const float* in, + __global float* out) +{ + uint xIndex = get_group_id(0) * TILE_DIM_COPY + get_local_id(0); + uint yIndex = get_group_id(1) * TILE_DIM_COPY + get_local_id(1); + uint zIndex = get_global_id(2); + uint index = xIndex + yIndex * WIDTH + zIndex*WIDTH*WIDTH; + + for(uint i=0; i<TILE_DIM_COPY; i+=BLOCK_ROWS_COPY) + { + out[index + i*WIDTH] = in[index + i*WIDTH]; + } +} diff --git a/HySoP/hysop/gpu/cl_src/copy_2D_opt2.cl b/HySoP/hysop/gpu/cl_src/copy_2D_opt2.cl new file mode 100644 index 0000000000000000000000000000000000000000..7c2f3a950f744724775bea503cf69131095c13b8 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/copy_2D_opt2.cl @@ -0,0 +1,17 @@ + +__kernel void copy(__global const float* in, + __global float* out) +{ + uint xIndex = (get_group_id(0) * TILE_DIM_COPY + get_local_id(0)*2); + uint yIndex = get_group_id(1) * TILE_DIM_COPY + get_local_id(1); + uint index = xIndex + yIndex * WIDTH; + float x,y; + + for(uint i=0; i<TILE_DIM_COPY; i+=BLOCK_ROWS_COPY) + { + x = in[index + i*WIDTH]; + y = in[index + 1 + i*WIDTH]; + out[index + i*WIDTH] = x; + out[index + 1 + i*WIDTH] = y; + } +} diff --git a/HySoP/hysop/gpu/cl_src/copy_3D_locMem.cl b/HySoP/hysop/gpu/cl_src/copy_3D_locMem.cl new file mode 100644 index 0000000000000000000000000000000000000000..b13c4b735ca5000ba8697cdfd43aa0cf024a789c --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/copy_3D_locMem.cl @@ -0,0 +1,21 @@ + +__kernel void copy(__global const float* in, + __global float* out) +{ + uint xIndex = get_group_id(0) * TILE_DIM_COPY + get_local_id(0); + uint yIndex = get_group_id(1) * TILE_DIM_COPY + get_local_id(1); + uint zIndex = get_global_id(2); + uint index = xIndex + yIndex * WIDTH + zIndex*WIDTH*WIDTH; + + __local float tile[TILE_DIM_COPY][TILE_DIM_COPY]; + + for(uint i=0; i<TILE_DIM_COPY; i+=BLOCK_ROWS_COPY) + { + tile[get_local_id(1)+i][get_local_id(0)] = in[index + i*WIDTH]; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(uint i=0; i<TILE_DIM_COPY; i+=BLOCK_ROWS_COPY) + { + out[index + i*WIDTH] = tile[get_local_id(1)+i][get_local_id(0)]; + } +} diff --git a/HySoP/hysop/gpu/cl_src/copy_3D_opt4.cl b/HySoP/hysop/gpu/cl_src/copy_3D_opt4.cl new file mode 100644 index 0000000000000000000000000000000000000000..e5bec1c04df53e5b901db18b6c959bbc4443fb55 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/copy_3D_opt4.cl @@ -0,0 +1,22 @@ + +__kernel void copy(__global const float* in, + __global float* out) +{ + uint xIndex = (get_group_id(0) * TILE_DIM_COPY + get_local_id(0)*4); + uint yIndex = get_group_id(1) * TILE_DIM_COPY + get_local_id(1); + uint zIndex = get_global_id(2); + uint index = xIndex + yIndex * WIDTH + zIndex*WIDTH*WIDTH; + float x,y,z,w; + + for(uint i=0; i<TILE_DIM_COPY; i+=BLOCK_ROWS_COPY) + { + x = in[index + i*WIDTH]; + y = in[index + 1 + i*WIDTH]; + z = in[index + 2 + i*WIDTH]; + w = in[index + 3 + i*WIDTH]; + out[index + i*WIDTH] = x; + out[index + 1 + i*WIDTH] = y; + out[index + 2 + i*WIDTH] = z; + out[index + 3 + i*WIDTH] = w; + } +} diff --git a/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_noBC.cl b/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_noBC.cl new file mode 100644 index 0000000000000000000000000000000000000000..895250723bdd1eaa04357817122fd5205eb10e1c --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_noBC.cl @@ -0,0 +1,90 @@ +/** + * Local memory as a 2D array to avoid bank conflicts. + * + * @param id Index 1D + * @param nb_part Particle number per work-item + * + * @return Index in a 2D space. + */ +inline uint noBC_id(int id, int nb_part){ + return (id%nb_part)*WGN+(id/nb_part); +} + +/** + * 2D Remeshing kernel. + * + * @param ppos Particle position + * @param pscal Particle scalar + * @param gscal Grid scalar + * @param min_position Domain lower point + * @param dx Space step + */ +__kernel void remeshing(__global const float* ppos, + __global const float* pscal, + __global float* gscal, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + float invdx = 1.0/dx; + int4 ind; + uint i, nb_part = WIDTH/WGN; + float4 p, s, y; + uint4 index4; + + __local float gscal_loc[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + { + gscal_loc[i] = 0.0; + gscal_loc[i+1] = 0.0; + gscal_loc[i+2] = 0.0; + gscal_loc[i+3] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=4) + { + p = vload4((i + gidY*WIDTH)/4, ppos) - (float4)(min_position); + s = vload4((i + gidY*WIDTH)/4, pscal); + ind = convert_int4_rtn(p * invdx); + y = (p - convert_float4(ind) * dx) * invdx; + index4 = convert_uint4((ind - 2 + WIDTH) % WIDTH); + gscal_loc[noBC_id(index4.x,nb_part)] += (alpha(y.x,s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (alpha(y.y,s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (alpha(y.z,s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (alpha(y.w,s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (beta(y.x,s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (beta(y.y,s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (beta(y.z,s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (beta(y.w,s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (gamma(y.x, s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (gamma(y.y, s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (gamma(y.z, s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (gamma(y.w, s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (delta(y.x, s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (delta(y.y, s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (delta(y.z, s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (delta(y.w, s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (eta(y.x, s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (eta(y.y, s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (eta(y.z, s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (eta(y.w, s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gscal_loc[noBC_id(index4.x,nb_part)] += (zeta(y.x, s.x)); + gscal_loc[noBC_id(index4.y,nb_part)] += (zeta(y.y, s.y)); + gscal_loc[noBC_id(index4.z,nb_part)] += (zeta(y.z, s.z)); + gscal_loc[noBC_id(index4.w,nb_part)] += (zeta(y.w, s.w)); + barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + vstore4((float4)(gscal_loc[noBC_id(i,nb_part)],gscal_loc[noBC_id(i+1,nb_part)], gscal_loc[noBC_id(i+2,nb_part)], gscal_loc[noBC_id(i+3,nb_part)]), (i + gidY*WIDTH)/4, gscal); +} diff --git a/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_private4.cl b/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_private4.cl new file mode 100644 index 0000000000000000000000000000000000000000..bd831d5abf3d791b984c67d3e4dee9d3b447f004 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_private4.cl @@ -0,0 +1,76 @@ + +__kernel void remeshing(__global const float* ppos, + __global const float* pscal, + __global float* gscal, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + float invdx = 1.0/dx; + int4 ind; + uint i, nb_part = WIDTH/WGN; + float4 p, s, y, gs; + uint4 index4; + + __local float gscal_loc[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + { + gscal_loc[i] = 0.0; + gscal_loc[i+1] = 0.0; + gscal_loc[i+2] = 0.0; + gscal_loc[i+3] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=4) + { + p = vload4((i + gidY*WIDTH)/4, ppos) - (float4)(min_position); + s = vload4((i + gidY*WIDTH)/4, pscal); + ind = convert_int4_rtn(p * invdx); + y = (p - convert_float4(ind) * dx) * invdx; + index4 = convert_uint4((ind - 2 + WIDTH) % WIDTH); + gs = (alpha(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (beta(y, s)); + gscal_loc[index4.x]+= gs.x; + gscal_loc[index4.y]+= gs.y; + gscal_loc[index4.z]+= gs.z; + gscal_loc[index4.w]+= gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (gamma(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (delta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (eta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (zeta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + vstore4((float4)(gscal_loc[i],gscal_loc[i+1], gscal_loc[i+2], gscal_loc[i+3]), (i + gidY*WIDTH)/4, gscal); +} diff --git a/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_private4_noBC.cl b/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_private4_noBC.cl new file mode 100644 index 0000000000000000000000000000000000000000..014edd31671f10731a58ba826b876bb5bf612fa1 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/remeshing_2D_opt4_private4_noBC.cl @@ -0,0 +1,79 @@ + +inline uint noBC_id(int id, int nb_part){ + return (id%nb_part)*WGN+(id/nb_part); +} +__kernel void remeshing(__global const float* ppos, + __global const float* pscal, + __global float* gscal, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + float invdx = 1.0/dx; + int4 ind; + uint i, nb_part = WIDTH/WGN; + float4 p, s, y, gs; + uint4 index4; + + __local float gscal_loc[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + { + gscal_loc[i] = 0.0; + gscal_loc[i+1] = 0.0; + gscal_loc[i+2] = 0.0; + gscal_loc[i+3] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=4) + { + p = vload4((i + gidY*WIDTH)/4, ppos) - (float4)(min_position); + s = vload4((i + gidY*WIDTH)/4, pscal); + ind = convert_int4_rtn(p * invdx); + y = (p - convert_float4(ind) * dx) * invdx; + index4 = convert_uint4((ind - 2 + WIDTH) % WIDTH); + gs = (alpha(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (beta(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (gamma(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (delta(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (eta(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (zeta(y, s)); + gscal_loc[noBC_id(index4.x, nb_part)] += gs.x; + gscal_loc[noBC_id(index4.y, nb_part)] += gs.y; + gscal_loc[noBC_id(index4.z, nb_part)] += gs.z; + gscal_loc[noBC_id(index4.w, nb_part)] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + vstore4((float4)(gscal_loc[noBC_id(i, nb_part)],gscal_loc[noBC_id(i+1, nb_part)], gscal_loc[noBC_id(i+2, nb_part)], gscal_loc[noBC_id(i+3, nb_part)]), (i + gidY*WIDTH)/4, gscal); +} diff --git a/HySoP/hysop/gpu/cl_src/remeshing_3D_opt4_private4.cl b/HySoP/hysop/gpu/cl_src/remeshing_3D_opt4_private4.cl new file mode 100644 index 0000000000000000000000000000000000000000..eb67791af74bdfb46e79c38c87fcc216d5fc6032 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/remeshing_3D_opt4_private4.cl @@ -0,0 +1,81 @@ + +__kernel void remeshing(__global const float* ppos, + __global const float* pscal, + __global float* gscal, float min_position, float dx) +{ + uint gidX = get_global_id(0); + uint gidY = get_global_id(1); + uint gidZ = get_global_id(2); + float invdx = 1.0/dx; + int4 ind; + uint i, nb_part = WIDTH/WGN; + float4 p, s, y, gs; + uint4 index4; + + __local float gscal_loc[WIDTH]; + + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + { + gscal_loc[i] = 0.0; + gscal_loc[i+1] = 0.0; + gscal_loc[i+2] = 0.0; + gscal_loc[i+3] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=4) + { + p = vload4((i + gidY*WIDTH + gidZ*WIDTH*WIDTH)/4, ppos) - (float4)(min_position); + s = vload4((i + gidY*WIDTH + gidZ*WIDTH*WIDTH)/4, pscal); + ind = convert_int4_rtn(p * invdx); + y = (p - convert_float4(ind) * dx) * invdx; + index4 = convert_uint4((ind - 2 + WIDTH) % WIDTH); + gs = (alpha(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (beta(y, s)); + gscal_loc[index4.x]+= gs.x; + gscal_loc[index4.y]+= gs.y; + gscal_loc[index4.z]+= gs.z; + gscal_loc[index4.w]+= gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (gamma(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (delta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (eta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + index4 = (index4 + 1) % WIDTH; + gs = (zeta(y, s)); + gscal_loc[index4.x] += gs.x; + gscal_loc[index4.y] += gs.y; + gscal_loc[index4.z] += gs.z; + gscal_loc[index4.w] += gs.w; + barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); + for(i=gidX*4; i<WIDTH; i+=(WGN*4)) + vstore4((float4)(gscal_loc[i], + gscal_loc[i+1], + gscal_loc[i+2], + gscal_loc[i+3]), + (i + gidY*WIDTH + gidZ*WIDTH*WIDTH)/4, gscal); +} diff --git a/HySoP/hysop/gpu/cl_src/transpose_2D_xy_coalesced_locPad_Diag_opt4.cl b/HySoP/hysop/gpu/cl_src/transpose_2D_xy_coalesced_locPad_Diag_opt4.cl new file mode 100644 index 0000000000000000000000000000000000000000..b6853e37ba37b97e35fbf8a7d224acfcbda7c5a2 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/transpose_2D_xy_coalesced_locPad_Diag_opt4.cl @@ -0,0 +1,36 @@ + +__kernel void transpose_xy(__global const float* in, + __global float* out) +{ + float4 temp; + uint group_id_x = get_group_id(0); + uint group_id_y = (get_group_id(0) + get_group_id(1)) % get_num_groups(0); + + uint xIndex = group_id_x * TILE_DIM_XY + get_local_id(0)*4; + uint yIndex = group_id_y * TILE_DIM_XY + get_local_id(1); + uint index_in = xIndex + yIndex * WIDTH; + + xIndex = group_id_y * TILE_DIM_XY + get_local_id(0)*4; + yIndex = group_id_x * TILE_DIM_XY + get_local_id(1); + uint index_out = xIndex + yIndex * WIDTH; + + __local float tile[TILE_DIM_XY][TILE_DIM_XY+1]; + + for(uint i=0; i<TILE_DIM_XY; i+=BLOCK_ROWS_XY) + { + temp = vload4((index_in + i*WIDTH)/4, in); + tile[get_local_id(1) + i][get_local_id(0)*4] = temp.x; + tile[get_local_id(1) + i][get_local_id(0)*4+1] = temp.y; + tile[get_local_id(1) + i][get_local_id(0)*4+2] = temp.z; + tile[get_local_id(1) + i][get_local_id(0)*4+3] = temp.w; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(uint i=0; i<TILE_DIM_XY; i+=BLOCK_ROWS_XY) + { + temp = (float4)(tile[get_local_id(0)*4][get_local_id(1) + i], + tile[get_local_id(0)*4+1][get_local_id(1) + i], + tile[get_local_id(0)*4+2][get_local_id(1) + i], + tile[get_local_id(0)*4+3][get_local_id(1) + i]); + vstore4(temp, (index_out + i*WIDTH)/4, out); + } +} diff --git a/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_2Dslice_opt2.cl b/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_2Dslice_opt2.cl new file mode 100644 index 0000000000000000000000000000000000000000..f94717ae5827d2b0bdd50d9d27da32336e935aef --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_2Dslice_opt2.cl @@ -0,0 +1,33 @@ + +__kernel void transpose_xy(__global const float* in, + __global float* out) +{ + float2 temp; + uint group_id_x = get_group_id(0); + uint group_id_y = (get_group_id(0) + get_group_id(1)) % get_num_groups(0); + + uint xIndex = group_id_x * TILE_DIM_XY + get_local_id(0)*2; + uint yIndex = group_id_y * TILE_DIM_XY + get_local_id(1); + uint zIndex = get_global_id(2); + uint index_in = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + xIndex = group_id_y * TILE_DIM_XY + get_local_id(0)*2; + yIndex = group_id_x * TILE_DIM_XY + get_local_id(1); + uint index_out = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + __local float tile[TILE_DIM_XY][TILE_DIM_XY+1]; + + for(uint i=0; i<TILE_DIM_XY; i+=BLOCK_ROWS_XY) + { + temp = vload2((index_in + i*WIDTH)/2, in); + tile[get_local_id(1) + i][get_local_id(0)*2] = temp.x; + tile[get_local_id(1) + i][get_local_id(0)*2+1] = temp.y; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(uint i=0; i<TILE_DIM_XY; i+=BLOCK_ROWS_XY) + { + temp = (float2)(tile[get_local_id(0)*2][get_local_id(1) + i], + tile[get_local_id(0)*2+1][get_local_id(1) + i]); + vstore2(temp, (index_out + i*WIDTH)/2, out); + } +} diff --git a/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_2Dslice_opt4.cl b/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_2Dslice_opt4.cl new file mode 100644 index 0000000000000000000000000000000000000000..e8afa56b9744f41d70327adde3541c83d0f17806 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_2Dslice_opt4.cl @@ -0,0 +1,37 @@ + +__kernel void transpose_xy(__global const float* in, + __global float* out) +{ + float4 temp; + uint group_id_x = get_group_id(0); + uint group_id_y = (get_group_id(0) + get_group_id(1)) % get_num_groups(0); + + uint xIndex = group_id_x * TILE_DIM_XY + get_local_id(0)*4; + uint yIndex = group_id_y * TILE_DIM_XY + get_local_id(1); + uint zIndex = get_global_id(2); + uint index_in = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + xIndex = group_id_y * TILE_DIM_XY + get_local_id(0)*4; + yIndex = group_id_x * TILE_DIM_XY + get_local_id(1); + uint index_out = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + __local float tile[TILE_DIM_XY][TILE_DIM_XY+1]; + + for(uint i=0; i<TILE_DIM_XY; i+=BLOCK_ROWS_XY) + { + temp = vload4((index_in + i*WIDTH)/4, in); + tile[get_local_id(1) + i][get_local_id(0)*4] = temp.x; + tile[get_local_id(1) + i][get_local_id(0)*4+1] = temp.y; + tile[get_local_id(1) + i][get_local_id(0)*4+2] = temp.z; + tile[get_local_id(1) + i][get_local_id(0)*4+3] = temp.w; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(uint i=0; i<TILE_DIM_XY; i+=BLOCK_ROWS_XY) + { + temp = (float4)(tile[get_local_id(0)*4][get_local_id(1) + i], + tile[get_local_id(0)*4+1][get_local_id(1) + i], + tile[get_local_id(0)*4+2][get_local_id(1) + i], + tile[get_local_id(0)*4+3][get_local_id(1) + i]); + vstore4(temp, (index_out + i*WIDTH)/4, out); + } +} diff --git a/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_opt2.cl b/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_opt2.cl new file mode 100644 index 0000000000000000000000000000000000000000..f94717ae5827d2b0bdd50d9d27da32336e935aef --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/transpose_3D_xy_coalesced_locPad_Diag_opt2.cl @@ -0,0 +1,33 @@ + +__kernel void transpose_xy(__global const float* in, + __global float* out) +{ + float2 temp; + uint group_id_x = get_group_id(0); + uint group_id_y = (get_group_id(0) + get_group_id(1)) % get_num_groups(0); + + uint xIndex = group_id_x * TILE_DIM_XY + get_local_id(0)*2; + uint yIndex = group_id_y * TILE_DIM_XY + get_local_id(1); + uint zIndex = get_global_id(2); + uint index_in = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + xIndex = group_id_y * TILE_DIM_XY + get_local_id(0)*2; + yIndex = group_id_x * TILE_DIM_XY + get_local_id(1); + uint index_out = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + __local float tile[TILE_DIM_XY][TILE_DIM_XY+1]; + + for(uint i=0; i<TILE_DIM_XY; i+=BLOCK_ROWS_XY) + { + temp = vload2((index_in + i*WIDTH)/2, in); + tile[get_local_id(1) + i][get_local_id(0)*2] = temp.x; + tile[get_local_id(1) + i][get_local_id(0)*2+1] = temp.y; + } + barrier(CLK_LOCAL_MEM_FENCE); + for(uint i=0; i<TILE_DIM_XY; i+=BLOCK_ROWS_XY) + { + temp = (float2)(tile[get_local_id(0)*2][get_local_id(1) + i], + tile[get_local_id(0)*2+1][get_local_id(1) + i]); + vstore2(temp, (index_out + i*WIDTH)/2, out); + } +} diff --git a/HySoP/hysop/gpu/cl_src/transpose_3D_xz_coalesced_Diag_bis_3DBlock.cl b/HySoP/hysop/gpu/cl_src/transpose_3D_xz_coalesced_Diag_bis_3DBlock.cl new file mode 100644 index 0000000000000000000000000000000000000000..67e8891bd6910bb0d2463ec367f10a11d8aa5ba8 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/transpose_3D_xz_coalesced_Diag_bis_3DBlock.cl @@ -0,0 +1,34 @@ + +__kernel void transpose_xz(__global const float* in, + __global float* out) +{ + uint group_id_x = get_group_id(0); + uint group_id_z = (get_group_id(0) + get_group_id(2)) % get_num_groups(0); + + uint xIndex = group_id_x * TILE_DIM_XZ + get_local_id(0); + uint yIndex = get_group_id(1) * TILE_DIM_XZ + get_local_id(1); + uint zIndex = group_id_z * TILE_DIM_XZ + get_local_id(2); + uint index_in = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + xIndex = group_id_z * TILE_DIM_XZ + get_local_id(0); + zIndex = group_id_x * TILE_DIM_XZ + get_local_id(2); + uint index_out = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + __local float tile[TILE_DIM_XZ][TILE_DIM_XZ][TILE_DIM_XZ]; + + for(uint j=0; j<TILE_DIM_XZ; j+=BLOCK_ROWS_XZ) + { + for(uint i=0; i<TILE_DIM_XZ; i+=BLOCK_ROWS_XZ) + { + tile[get_local_id(2) + j][get_local_id(1) + i][get_local_id(0)] = in[index_in + i*WIDTH + j*WIDTH*WIDTH]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for(uint j=0; j<TILE_DIM_XZ; j+=BLOCK_ROWS_XZ) + { + for(uint i=0; i<TILE_DIM_XZ; i+=BLOCK_ROWS_XZ) + { + out[index_out + i*WIDTH + j*WIDTH*WIDTH] = tile[get_local_id(0)][get_local_id(1)+i][get_local_id(2) + j]; + } + } +} diff --git a/HySoP/hysop/gpu/cl_src/transpose_3D_xz_coalesced_locPad_Diag_bis_3DBlock.cl b/HySoP/hysop/gpu/cl_src/transpose_3D_xz_coalesced_locPad_Diag_bis_3DBlock.cl new file mode 100644 index 0000000000000000000000000000000000000000..a112ea1993e8885aa57c845d09120b466a57d21f --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/transpose_3D_xz_coalesced_locPad_Diag_bis_3DBlock.cl @@ -0,0 +1,34 @@ + +__kernel void transpose_xz(__global const float* in, + __global float* out) +{ + uint group_id_x = get_group_id(0); + uint group_id_z = (get_group_id(0) + get_group_id(2)) % get_num_groups(0); + + uint xIndex = group_id_x * TILE_DIM_XZ + get_local_id(0); + uint yIndex = get_group_id(1) * TILE_DIM_XZ + get_local_id(1); + uint zIndex = group_id_z * TILE_DIM_XZ + get_local_id(2); + uint index_in = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + xIndex = group_id_z * TILE_DIM_XZ + get_local_id(0); + zIndex = group_id_x * TILE_DIM_XZ + get_local_id(2); + uint index_out = xIndex + yIndex * WIDTH + zIndex * WIDTH * WIDTH; + + __local float tile[TILE_DIM_XZ][TILE_DIM_XZ][TILE_DIM_XZ+1]; + + for(uint j=0; j<TILE_DIM_XZ; j+=BLOCK_ROWS_XZ) + { + for(uint i=0; i<TILE_DIM_XZ; i+=BLOCK_ROWS_XZ) + { + tile[get_local_id(2) + j][get_local_id(1) + i][get_local_id(0)] = in[index_in + i*WIDTH + j*WIDTH*WIDTH]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for(uint j=0; j<TILE_DIM_XZ; j+=BLOCK_ROWS_XZ) + { + for(uint i=0; i<TILE_DIM_XZ; i+=BLOCK_ROWS_XZ) + { + out[index_out + i*WIDTH + j*WIDTH*WIDTH] = tile[get_local_id(0)][get_local_id(1)+i][get_local_id(2) + j]; + } + } +} diff --git a/HySoP/hysop/gpu/cl_src/weights_m6prime.cl b/HySoP/hysop/gpu/cl_src/weights_m6prime.cl new file mode 100644 index 0000000000000000000000000000000000000000..cd21a32e74bff85d463b68da55d44bd8f83749ee --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/weights_m6prime.cl @@ -0,0 +1,20 @@ +/** + * M6prime weights. + * + * @param y Distance between the particle and left-hand grid point + * @param s Scalar of the particle + * + * @return weights + */ +inline float alpha(float y, float s){ + return (y * (y * (y * (y * (13.0 - 5.0 * y) - 9.0) - 1.0) + 2.0) / 24.0) * s;} +inline float beta(float y, float s){ + return (y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0) / 24.0)*s;} +inline float gamma(float y, float s){ + return (y * y * (y * (y * (126.0 - 50.0 * y) - 70.0) - 30.0) / 24.0 + 1.0)*s;} +inline float delta(float y, float s){ + return (y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0) / 24.0)*s;} +inline float eta(float y, float s){ + return (y * (y * (y * (y * (61.0 - 25.0 * y) - 33.0) - 1.0) - 2.0) / 24.0)*s;} +inline float zeta(float y, float s){ + return (y * y * y * (y * (5.0 * y - 12.0) + 7.0) / 24.0)*s;} diff --git a/HySoP/hysop/gpu/cl_src/weights_m6prime_builtin.cl b/HySoP/hysop/gpu/cl_src/weights_m6prime_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..f63a419523ab10048ed59ca85cbb25e3e1326da1 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/weights_m6prime_builtin.cl @@ -0,0 +1,13 @@ + +inline float alpha(float y, float s){ + return (y * fma(y , fma(y , fma(y , fma(-5.0, y, 13.0),- 9.0),- 1.0), 2.0) / 24.0) * s;} +inline float beta(float y, float s){ + return (y * fma(y , fma(y , fma(y , fma(25.0 , y ,- 64.0), 39.0) , 16.0), - 16.0) / 24.0)*s;} +inline float gamma(float y, float s){ + return (y * y * fma(y , fma(y , fma( - 50.0 , y, 126.0) ,- 70.0) ,- 30.0) / 24.0 + 1.0)*s;} +inline float delta(float y, float s){ + return (y * fma(y , fma(y, fma(y, fma(50.0, y, - 124.0), 66.0) , 16.0) , 16.0) / 24.0)*s;} +inline float eta(float y, float s){ + return (y * fma(y , fma(y , fma(y , fma(- 25.0 , y, 61.0), - 33.0), - 1.0), - 2.0) / 24.0)*s;} +inline float zeta(float y, float s){ + return (y * y * y * fma(y , fma(5.0 , y ,- 12.0) , 7.0) / 24.0)*s;} diff --git a/HySoP/hysop/gpu/cl_src/weights_m6prime_opt4.cl b/HySoP/hysop/gpu/cl_src/weights_m6prime_opt4.cl new file mode 100644 index 0000000000000000000000000000000000000000..865327f319211b18015e1ed556b7456c54b6cb7c --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/weights_m6prime_opt4.cl @@ -0,0 +1,13 @@ + +inline float4 alpha(float4 y, float4 s){ + return (y * (y * (y * (y * (13.0 - 5.0 * y) - 9.0) - 1.0) + 2.0) / 24.0) * s;} +inline float4 beta(float4 y, float4 s){ + return (y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0) / 24.0)*s;} +inline float4 gamma(float4 y, float4 s){ + return (y * y * (y * (y * (126.0 - 50.0 * y) - 70.0) - 30.0) / 24.0 + 1.0)*s;} +inline float4 delta(float4 y, float4 s){ + return (y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0) / 24.0)*s;} +inline float4 eta(float4 y, float4 s){ + return (y * (y * (y * (y * (61.0 - 25.0 * y) - 33.0) - 1.0) - 2.0) / 24.0)*s;} +inline float4 zeta(float4 y, float4 s){ + return (y * y * y * (y * (5.0 * y - 12.0) + 7.0) / 24.0)*s;} diff --git a/HySoP/hysop/gpu/cl_src/weights_m6prime_opt4_builtin.cl b/HySoP/hysop/gpu/cl_src/weights_m6prime_opt4_builtin.cl new file mode 100644 index 0000000000000000000000000000000000000000..3a4c2eaeb63ee4317914062ddfb07a0fd38bb6a3 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/weights_m6prime_opt4_builtin.cl @@ -0,0 +1,13 @@ + +inline float4 alpha(float4 y, float4 s){ + return (y * fma(y , fma(y , fma(y , fma(-5.0, y, 13.0),- 9.0),- 1.0), 2.0) / 24.0) * s;} +inline float4 beta(float4 y, float4 s){ + return (y * fma(y , fma(y , fma(y , fma(25.0 , y ,- 64.0), 39.0) , 16.0), - 16.0) / 24.0)*s;} +inline float4 gamma(float4 y, float4 s){ + return (y * y * fma(y , fma(y , fma( - 50.0 , y, 126.0) ,- 70.0) ,- 30.0) / 24.0 + 1.0)*s;} +inline float4 delta(float4 y, float4 s){ + return (y * fma(y , fma(y, fma(y, fma(50.0, y, - 124.0), 66.0) , 16.0) , 16.0) / 24.0)*s;} +inline float4 eta(float4 y, float4 s){ + return (y * fma(y , fma(y , fma(y , fma(- 25.0 , y, 61.0), - 33.0), - 1.0), - 2.0) / 24.0)*s;} +inline float4 zeta(float4 y, float4 s){ + return (y * y * y * fma(y , fma(5.0 , y ,- 12.0) , 7.0) / 24.0)*s;} diff --git a/HySoP/hysop/mpi/tests/test_topology.py b/HySoP/hysop/mpi/tests/test_topology.py index b805313e1be7cd9c346c0ef38b5f296518b06106..ebe9b25296a81131982d80c558f15b22ef6a41d9 100644 --- a/HySoP/hysop/mpi/tests/test_topology.py +++ b/HySoP/hysop/mpi/tests/test_topology.py @@ -53,6 +53,16 @@ def test_create_topology_with_dims(): assert testList.all() +def test_comparing_topologies(): + dom = pp.Box() + resolTopo = [33, 33, 17] + topoDims = [pp.mpi.main_size, 1] + topo = pp.topology.Cartesian.withResolution(dom, topoDims, resolTopo) + topo2 = pp.topology.Cartesian(pp.Box(), 2, resolTopo) + assert not topo == topo2 + topo2 = pp.topology.Cartesian(dom, 2, [11, 33, 17]) + assert not topo == topo2 + if __name__ == "__main__": #test_create_topology1D() diff --git a/HySoP/hysop/mpi/topology.py b/HySoP/hysop/mpi/topology.py index c55749959e7c45b63ab9ca987b2e79d40523fd18..4d0deea6c5e7fb85ad9df3da0eae699caad5b3c0 100644 --- a/HySoP/hysop/mpi/topology.py +++ b/HySoP/hysop/mpi/topology.py @@ -1,9 +1,11 @@ """ -@package parmepy.domain.topology +@package parmepy.mpi.topology MPI Topologies +@todo merge CartesianTopology and Topology """ from parmepy.constants import ORDER, np, PARMES_INTEGER, XDIR, YDIR, ZDIR +from parmepy.domain.mesh import LocalMesh, SubMesh from main_var import main_comm, MPI, main_size from itertools import count @@ -197,7 +199,7 @@ class Cartesian(object): ## if(domain.dimension > 2): ## self.south, self.north = self.topo.Shift(ZDIR, 1) ## self.neighbours[:, ZDIR] = self.south, self.north -## self.tabSort = np.array([[self.up,0] , [self.down,1] , [self.east,2], [self.west,3], +## self.tabSort = np.array([[self.up,0] , [self.down,1] , [self.east,2], [self.west,3], ## [self.north,4],[self.south,5]]) ## tabSort1 = self.tabSort[self.tabSort[:,0]<=self.rank] ## tabSort2 = self.tabSort[self.tabSort[:,0]>self.rank] @@ -319,6 +321,22 @@ class Cartesian(object): return cls(domain, cls.dim, globalMeshResolution, cutdir=cutdir, periods=periods, ghosts=ghosts) + def __eq__(self, other): + """ + Topology comparison method. Comparison based on: + @li globalMeshResolution + @li localMeshResolution + @li dims + @li domain + @todo completer le test dans parmepy/mpi/tests/test_topology.py + """ + return np.equal(self.globalMeshResolution, + other.globalMeshResolution).all() and \ + np.equal(self.mesh.resolution, other.mesh.resolution).all() and \ + np.equal(self.dims, other.dims).all() and \ + np.equal(self.ghosts, other.ghosts).all() and \ + self.domain == other.domain + def __str__(self): """ Topology info display """ s = '======== Topology id : ' + str(self.idTopo) + ' ========\n' @@ -335,113 +353,6 @@ class Cartesian(object): return s -class LocalMesh(object): - """ - Compute a local mesh resolution, using MPI process grid (topology) - and the global mesh resolution. - It also depends on ghost layer witdh and boundary conditions type. - """ - def __init__(self, rank, resolution, start, dom_size, dom_origin, ghosts=np.zeros((3))): - # Current process rank in the topology that owns this mesh - self.rank = rank - # Local resolution - self.resolution = resolution.copy() + 2 * ghosts - # index of the lower point (in each dir) in the global mesh - self.start = start # - ghosts - # index of the upper point (in each dir), global mesh - self.end = self.start + self.resolution - 1 - # Mesh step size - self.size = dom_size - # Mesh coordinates - coord_start = self.start * self.size + dom_origin - ghosts* self.size - self.origin = coord_start - coord_end = (self.end +1) * self.size + dom_origin - ghosts* self.size -# print 'Print TOPO' -# print 'resolution', self.resolution , 'shape', (coord_end-coord_start)/self.size, 'coord_end',coord_end, 'coord_start',coord_start - if self.start.size == 3: - self.coords = np.ogrid[coord_start[0]:coord_end[0]:self.size[0], - coord_start[1]:coord_end[1]:self.size[1], - coord_start[2]:coord_end[2]:self.size[2]] - elif self.start.size == 2: - self.coords = np.ogrid[coord_start[0]:coord_end[0]:self.size[0], - coord_start[1]:coord_end[1]:self.size[1]] - else: - self.coords = np.ogrid[coord_start[0]:coord_end[0]:self.size[0]] - - def __str__(self): - """ Local mesh display """ - s = '[' + str(self.rank) + '] Local mesh resolution :' \ - + str(self.resolution) + '\n' - - s += 'Global indices :' + str(self.start) + ' -- ' + str(self.end) + '\n' - return s - - -class SubMesh(object): - """ - Description of a local mesh, on one process of - the topology. - """ - def __init__(self, topo, g_start, resolution): - - ## Topology that creates (and owns) this mesh - self._topology = topo - ## Local resolution of this mesh, including ghost points - self.resolution = resolution - ## Dimension of the mesh - self.dim = self.resolution.size - ## index of the lowest point of this mesh - ## (in each dir) in the global mesh - self.global_start = g_start - ## index of the upper point (in each dir), global mesh - self.global_end = self.global_start + self.resolution - 1 - ## Mesh step size - self.space_step_size = topo.domain.length / \ - (topo.globalMeshResolution - 1) - ## Mesh local indices, only for "computed" points - ## (i.e. excluding ghosts) - self.local_start = topo.ghosts.copy() - self.local_end = self.resolution.copy() - topo.ghosts[:] - 1 - ## Coordinates of the "lowest" point of this mesh (including ghost) - ## Warning FP : this strongly depends on where is the origin - ## of the domain, of the type of boundary conditions - ## and if origin is on ghosts or "real" points. - self.origin = topo.domain.origin.copy() - self.origin[:] += self.space_step_size * \ - (self.global_start[:] - topo.ghosts[:]) - - self.end = self.origin + self.space_step_size * (self.resolution - 1) - if self.dim == 1: - self.coords = np.linspace(self.origin, self.end, self.resolution) - - elif self.dim == 2: - cx = np.linspace(self.origin[0], self.end[0], - self.resolution[0])[:, np.newaxis] - cy = np.linspace(self.origin[1], self.end[1], - self.resolution[1])[np.newaxis, :] - self.coords = tuple([cx, cy]) - - elif self.dim == 3: - cx = np.linspace(self.origin[0], self.end[0], - self.resolution[0])[:, np.newaxis, np.newaxis] - cy = np.linspace(self.origin[1], self.end[1], - self.resolution[1])[np.newaxis, :, np.newaxis] - cz = np.linspace(self.origin[2], self.end[2], - self.resolution[2])[np.newaxis, np.newaxis, :] - self.coords = tuple([cx, cy, cz]) - - def __str__(self): - """ Sub mesh display """ - s = 'Coords:' + str(self._topology.coords[:]) - s += ' Sub mesh resolution:' + str(self.resolution) + '\n' - s += 'Starting point global indices :' + str(self.global_start) + '\n' - s += 'Local indices for "computed" points (excluding ghosts) :\n' - s += ' start = ' + str(self.local_start) - s += ', end = ' + str(self.local_end) + '\n' - return s - - if __name__ == "__main__": print "This module defines the following classes:" print "- CartesianTopology: ", CartesianTopology.__doc__ - print "- LocalMesh: ", LocalMesh.__doc__ diff --git a/HySoP/hysop/numerics/__init__.py b/HySoP/hysop/numerics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ae374d3bc7dc977e39288ab8c6dd0930c993659 --- /dev/null +++ b/HySoP/hysop/numerics/__init__.py @@ -0,0 +1,5 @@ +""" +@package parmepy.fields + +Everything concerning Fields. +""" diff --git a/HySoP/hysop/numerics/gpu_kernel.py b/HySoP/hysop/numerics/gpu_kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..144e520ec5fefd2830c889317020c3a77d6cb7b5 --- /dev/null +++ b/HySoP/hysop/numerics/gpu_kernel.py @@ -0,0 +1,119 @@ +""" +@package parmepy.numerics.gpu_kernel +""" +from parmepy.numerics.method import NumMethod +from parmepy.gpu import cl + + +class KernelListLauncher: + """ + OpenCL kernel list launcher. + + Manage launching of OpenCL kernels as a list. + """ + def __init__(self, kernel, queue, gsize, lsize=None): + """ + Create a kernel list launcher. + @param kernel : kernel list. + @param queue : OpenCL command queue. + @param gsize : OpenCL global size index. + @param lsize : OpenCL local size index. + """ + ## OpenCL Kernel list + self.kernel = kernel + print [k.function_name for k in self.kernel] + ## OpenCL command queue + self.queue = queue + ## OpenCL global size index. + self.global_size = gsize + ## OpenCL local size index. + self.local_size = lsize + self.call_number = [0 for k in self.kernel] + + def launch(self, d, *args): + """ + Launch a kernel. + + @param d : kernel index in kernel list. + @param args : kernel arguments. + @return OpenCL Event + + OpenCL global size and local sizes are not given in args. Class member are used. + """ + return KernelListLauncher.launch_sizes_in_args(self, d, self.global_size[d], self.local_size[d], *args) + + def launch_sizes_in_args(self, d, *args): + """ + Launch a kernel. + + @param d : kernel index in kernel list. + @param args : kernel arguments. + @return OpenCL Event. + + Opencl global and local sizes are given in args. + """ + #print self.kernel[d].function_name, args[0], args[1] + self.call_number[d] += 1 + evt = self.kernel[d](self.queue, *args) + return evt + + def finish(self): + self.queue.finish() + + def function_name(self, d=None): + """Prints OpenCL Kernels function names informations""" + if d is not None: + return self.kernel[d].get_info(cl.kernel_info.FUNCTION_NAME) + else: + return [k.get_info(cl.kernel_info.FUNCTION_NAME) for k in self.kernel] + + +class KernelLauncher(KernelListLauncher): + """ + OpenCL kernel launcher. + + Manage launching of one OpenCL kernel as a KernelListLauncher with a list of one kernel. + """ + def __init__(self, kernel, queue, gsize, lsize): + """ + Create a KernelLauncher. + + @param kernel : kernel. + @param queue : OpenCL command queue. + @param gsize : OpenCL global size index. + @param lsize : OpenCL local size index. + + Create a KernelListLauncher with a list of one kernel. + """ + KernelListLauncher.__init__(self, [kernel], queue, [gsize], [lsize]) + + def launch_sizes_in_args(self, *args): + """ + Launch the kernel. + + @param args : kernel arguments. + @return OpenCL Event. + + Opencl global and local sizes are given in args. + """ + return KernelListLauncher.launch_sizes_in_args(self, 0, *args) + + def finish(self): + """Wrapping OpenCL queue.finish() method.""" + self.queue.finish() + + def launch(self, *args): + """ + Launch the kernel. + + @param args : kernel arguments. + @return OpenCL Event + + OpenCL global size and local sizes are not given in args. Class member are used. + """ + return KernelListLauncher.launch(self, 0, *args) + + def function_name(self): + """Prints OpenCL Kernel function name informations""" + res = KernelListLauncher.function_name(self, 0) + return res diff --git a/HySoP/hysop/integrator/__init__.py b/HySoP/hysop/numerics/integrators/__init__.py similarity index 100% rename from HySoP/hysop/integrator/__init__.py rename to HySoP/hysop/numerics/integrators/__init__.py diff --git a/HySoP/hysop/integrator/euler.py b/HySoP/hysop/numerics/integrators/euler.py similarity index 89% rename from HySoP/hysop/integrator/euler.py rename to HySoP/hysop/numerics/integrators/euler.py index e14fbbf1790edf11dde0a8360910e2be0f65403e..fffd7f60c4de738849030cd302d2c3f31730a114 100755 --- a/HySoP/hysop/integrator/euler.py +++ b/HySoP/hysop/numerics/integrators/euler.py @@ -4,9 +4,8 @@ Forward Euler method. """ -from parmepy.constants import PARMES_REAL, ORDER -from ode_solver import ODESolver -import numpy as np +from parmepy.constants import np, PARMES_REAL, ORDER +from parmepy.numerics.integrators.odesolver import ODESolver class Euler(ODESolver): @@ -27,6 +26,7 @@ class Euler(ODESolver): @param conditions : function to apply boundary conditions. """ ODESolver.__init__(self, f) + self.name = 'euler' def integrate(self, f, t, dt, y): """ diff --git a/HySoP/hysop/integrator/ode_solver.py b/HySoP/hysop/numerics/integrators/odesolver.py similarity index 79% rename from HySoP/hysop/integrator/ode_solver.py rename to HySoP/hysop/numerics/integrators/odesolver.py index 33125ba11320c8b9262ba16cca4ceebe3c796999..0b59a94b4625f119f6272de889b98cc811bdbcea 100644 --- a/HySoP/hysop/integrator/ode_solver.py +++ b/HySoP/hysop/numerics/integrators/odesolver.py @@ -1,14 +1,14 @@ -# -*- coding: utf-8 -*- """ -@package parmepy.integrator.ode_solver +@package parmepy.numerics.integrators.odesolver Classes to describe (time) integrators. """ from abc import ABCMeta, abstractmethod -import numpy as np +from parmepy.constants import np +from parmepy.numerics.method import NumMethod -class ODESolver: +class ODESolver(NumMethod): """ Abstract description for ODE solvers. Solve the system : @@ -26,6 +26,7 @@ class ODESolver: @param f function f(t,y) : Right hand side of the equation to solve. """ + super(ODESolver, self).__init__() ## RHS. if f is not None: self.f = f @@ -34,7 +35,7 @@ class ODESolver: self.f = lambda t, y: np.vectorize(f)(t, y) @abstractmethod - def integrate(self, y, t, dt): + def __call__(self, y, t, dt): """ Abstract method, apply operaton on a variable. Must be implemented by sub-class. @@ -44,8 +45,13 @@ class ODESolver: @param dt : time step. @return y at t+dt. """ + pass if __name__ == "__main__": print __doc__ print "- Provided class : ODESolver (abstract)" print ODESolver.__doc__ + + + + diff --git a/HySoP/hysop/integrator/runge_kutta2.py b/HySoP/hysop/numerics/integrators/runge_kutta2.py similarity index 84% rename from HySoP/hysop/integrator/runge_kutta2.py rename to HySoP/hysop/numerics/integrators/runge_kutta2.py index 415bceba6bb6ad530c85356e26b6d450efad6c92..19f0c80050f0f0b2f958b515518a4a9d0456d64a 100755 --- a/HySoP/hysop/integrator/runge_kutta2.py +++ b/HySoP/hysop/numerics/integrators/runge_kutta2.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- """ -@package ode_solver.runge_kutta2 +@package parmepy.numerics.integrators.runge_kutta2 RK2 method interface. """ -from parmepy.constants import PARMES_REAL, ORDER -from ode_solver import ODESolver -import numpy as np +from parmepy.constants import np, PARMES_REAL, ORDER +from parmepy.numerics.integrators.odesolver import ODESolver class RK2(ODESolver): @@ -25,8 +23,9 @@ class RK2(ODESolver): @param dim : dimensions """ ODESolver.__init__(self, f) + self.name = 'RK2' - def integrate(self, f, t, dt, y): + def __call__(self, f, t, dt, y): """ Integration step for RK2 Method. diff --git a/HySoP/hysop/integrator/runge_kutta3.py b/HySoP/hysop/numerics/integrators/runge_kutta3.py similarity index 89% rename from HySoP/hysop/integrator/runge_kutta3.py rename to HySoP/hysop/numerics/integrators/runge_kutta3.py index be6de763156c66d86f1c2de66f56493deda9a31d..32d3b6aaef4907b7b10f091d82f12cdd99311d32 100755 --- a/HySoP/hysop/integrator/runge_kutta3.py +++ b/HySoP/hysop/numerics/integrators/runge_kutta3.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- """ @package ode_solver.runge_kutta3 RK3 method interface. """ -from parmepy.constants import PARMES_REAL, ORDER -from ode_solver import ODESolver -import numpy as np +from parmepy.constants import np, PARMES_REAL, ORDER +from parmepy.numerics.integrators.odesolver import ODESolver class RK3(ODESolver): @@ -26,8 +24,9 @@ class RK3(ODESolver): @param conditions : function to apply boundary conditions. """ ODESolver.__init__(self, f) + self.name = 'RK3' - def integrate(self, f, t, dt, y): + def __call__(self, f, t, dt, y): """ Integration step for RK2 Method. up = f[t, Y(t,space)]. diff --git a/HySoP/hysop/integrator/runge_kutta4.py b/HySoP/hysop/numerics/integrators/runge_kutta4.py similarity index 90% rename from HySoP/hysop/integrator/runge_kutta4.py rename to HySoP/hysop/numerics/integrators/runge_kutta4.py index 24cd48e865452fb9edf3deaa0b1df3f8416b1f9b..515168dde2e76524c75fbcbe493dd91e7cd63620 100755 --- a/HySoP/hysop/integrator/runge_kutta4.py +++ b/HySoP/hysop/numerics/integrators/runge_kutta4.py @@ -4,9 +4,8 @@ RK4 method interface. """ -from parmepy.constants import PARMES_REAL, ORDER -from ode_solver import ODESolver -import numpy as np +from parmepy.constants import np, PARMES_REAL, ORDER +from parmepy.numerics.integrators.odesolver import ODESolver class RK4(ODESolver): @@ -26,9 +25,9 @@ class RK4(ODESolver): @param conditions : function to apply boundary conditions. """ ODESolver.__init__(self, f) + self.name = 'RK4' - - def integrate(self, f, t, dt, y): + def __call__(self, f, t, dt, y): """ Integration step for RK2 Method. up = f[t, Y(t,space)]. diff --git a/HySoP/hysop/numerics/interpolation.py b/HySoP/hysop/numerics/interpolation.py new file mode 100644 index 0000000000000000000000000000000000000000..06acbd1091f21a9d01f6630be05b43c737b57dd6 --- /dev/null +++ b/HySoP/hysop/numerics/interpolation.py @@ -0,0 +1,26 @@ +""" +@package parmepy.numerics.interpolators.interpolation +""" +from parmepy.constants import np, PARMES_REAL, ORDER +from parmepy.numerics.method import NumMethod + + +class Linear(NumMethod): + """Remshing with M6prime function""" + + def __init__(self, f): + NumMethod.__init__(self) + self.name = 'LinearInterpolation' + self.field = f + + def __call__(self, x, y): + + """ + Remesh particles at position ppos with scalar + pscal along direction d. + + @param ppos : particle position + @param pscal : particle scalar + @param d : splitting direction + """ + raise ValueError("Not implemented") diff --git a/HySoP/hysop/numerics/method.py b/HySoP/hysop/numerics/method.py new file mode 100644 index 0000000000000000000000000000000000000000..bfe58adc62d97ed035d8d925afa19757dd7ab671 --- /dev/null +++ b/HySoP/hysop/numerics/method.py @@ -0,0 +1,22 @@ +""" +@package parmepy.numerics.method + +Numerical method interface. +""" +from abc import ABCMeta, abstractmethod + + +class NumMethod(object): + """Abstract description of numerical method. """ + + __metaclass__ = ABCMeta + + @abstractmethod + def __init__(self): + """ Creates a numerical method.""" + self.name = '' + + @abstractmethod + def __call__(self): + """Call the method""" + pass diff --git a/HySoP/hysop/numerics/remeshing/__init__.py b/HySoP/hysop/numerics/remeshing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ae374d3bc7dc977e39288ab8c6dd0930c993659 --- /dev/null +++ b/HySoP/hysop/numerics/remeshing/__init__.py @@ -0,0 +1,5 @@ +""" +@package parmepy.fields + +Everything concerning Fields. +""" diff --git a/HySoP/hysop/numerics/remeshing/m6prime.py b/HySoP/hysop/numerics/remeshing/m6prime.py new file mode 100644 index 0000000000000000000000000000000000000000..17e1e928b6fb6ee19e6eec6d8e30bf34ccc07570 --- /dev/null +++ b/HySoP/hysop/numerics/remeshing/m6prime.py @@ -0,0 +1,80 @@ +""" +@package parmepy.numerics.remeshing +""" +from parmepy.constants import np, PARMES_REAL, ORDER +from parmepy.numerics.remeshing.remeshing import Remeshing + + +class M6Prime(Remeshing): + """Remshing with M6prime function""" + + def __init__(self, dim): + Remeshing.__init__(self, dim) + self.name = 'M6Prime' + self._alpha = lambda y, s: s * y * ( + y * (y * (y * (13. - 5. * y) - 9.) - 1.) + 2.) / 24. + self._beta = lambda y, s: s * y * ( + y * (y * (y * (25. * y - 64.) + 39.) + 16.) - 16.) / 24. + self._gamma = lambda y, s: s * ( + y * y * (y * (y * (126. - 50. * y) - 70.) - 30.) / 24. + 1.) + self._delta = lambda y, s: s * y * ( + y * (y * (y * (50. * y - 124.) + 66.) + 16.) + 16.) / 24. + self._zeta = lambda y, s: s * y * ( + y * (y * (y * (61. - 25. * y) - 33.) - 1.) - 2.) / 24. + self._eta = lambda y, s: s * y * y * y * ( + y * (5. * y - 12.) + 7.) / 24. + + def __call__(self, ppos, pscal, d): + + """ + Remesh particles at position ppos with scalar + pscal along direction d. + + @param ppos : particle position + @param pscal : particle scalar + @param d : splitting direction + """ + size = ppos.shape + row, col = np.indices(size) + row_i = np.array(row) + floor = np.floor(ppos * size[d]) + y = np.array(ppos * size[d] - floor, dtype=PARMES_REAL, order=ORDER) + result = np.zeros(size, dtype=PARMES_REAL, order=ORDER) + ind = floor.astype(np.int32) % size[d] + # alpha + row = (ind - 2) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._alpha(y[sl], pscal[sl]) + # beta + row = (ind - 1) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._beta(y[sl], pscal[sl]) + # gamma + row = (ind) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._gamma(y[sl], pscal[sl]) + # delta + row = (ind + 1) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._delta(y[sl], pscal[sl]) + # zeta + row = (ind + 2) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._zeta(y[sl], pscal[sl]) + # eta + row = (ind + 3) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._eta(y[sl], pscal[sl]) + return result diff --git a/HySoP/hysop/numerics/remeshing/m8prime.py b/HySoP/hysop/numerics/remeshing/m8prime.py new file mode 100644 index 0000000000000000000000000000000000000000..f07b152c14d923a6094e4435c81774b438d9b205 --- /dev/null +++ b/HySoP/hysop/numerics/remeshing/m8prime.py @@ -0,0 +1,81 @@ +""" +@package parmepy.numerics.remeshing +""" +from parmepy.constants import np, PARMES_REAL, ORDER +from parmepy.numerics.remeshing.remeshing import Remeshing + + +class M6Prime(Remeshing): + """Remshing with M6prime function""" + + def __init__(self, dim): + Remeshing.__init__(self, dim) + self.name = 'M8Prime' + raise ValueError("Not implemented") + self._alpha = lambda y, s: s * y * ( + y * (y * (y * (13. - 5. * y) - 9.) - 1.) + 2.) / 24. + self._beta = lambda y, s: s * y * ( + y * (y * (y * (25. * y - 64.) + 39.) + 16.) - 16.) / 24. + self._gamma = lambda y, s: s * ( + y * y * (y * (y * (126. - 50. * y) - 70.) - 30.) / 24. + 1.) + self._delta = lambda y, s: s * y * ( + y * (y * (y * (50. * y - 124.) + 66.) + 16.) + 16.) / 24. + self._zeta = lambda y, s: s * y * ( + y * (y * (y * (61. - 25. * y) - 33.) - 1.) - 2.) / 24. + self._eta = lambda y, s: s * y * y * y * ( + y * (5. * y - 12.) + 7.) / 24. + + def __call__(self, ppos, pscal, d): + + """ + Remesh particles at position ppos with scalar + pscal along direction d. + + @param ppos : particle position + @param pscal : particle scalar + @param d : splitting direction + """ + size = ppos.shape + row, col = np.indices(size) + row_i = np.array(row) + floor = np.floor(ppos * size[d]) + y = np.array(ppos * size[d] - floor, dtype=PARMES_REAL, order=ORDER) + result = np.zeros(size, dtype=PARMES_REAL, order=ORDER) + ind = floor.astype(np.int32) % size[d] + # alpha + row = (ind - 2) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._alpha(y[sl], pscal[sl]) + # beta + row = (ind - 1) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._beta(y[sl], pscal[sl]) + # gamma + row = (ind) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._gamma(y[sl], pscal[sl]) + # delta + row = (ind + 1) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._delta(y[sl], pscal[sl]) + # zeta + row = (ind + 2) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._zeta(y[sl], pscal[sl]) + # eta + row = (ind + 3) % size[d] + for i in xrange(size[d]): + sl = self.slice_i_along_d(i, d) + result[row[sl], col[sl]] = result[row[sl], col[sl]] +\ + self._eta(y[sl], pscal[sl]) + return result diff --git a/HySoP/hysop/numerics/remeshing/remeshing.py b/HySoP/hysop/numerics/remeshing/remeshing.py new file mode 100644 index 0000000000000000000000000000000000000000..66362b247a3def35f28c0d15e114d44f7c261023 --- /dev/null +++ b/HySoP/hysop/numerics/remeshing/remeshing.py @@ -0,0 +1,36 @@ +""" +@package parmepy.numerics.remeshing.remeshing +""" +from abc import ABCMeta, abstractmethod +from parmepy.constants import np, PARMES_REAL, ORDER +from parmepy.numerics.method import NumMethod + + +class Remeshing(NumMethod): + """Remshing with M6prime function""" + + __metaclass__ = ABCMeta + + @abstractmethod + def __init__(self, dim): + NumMethod.__init__(self) + self.name = 'M6Prime' + self._slice_all = [slice(None, None, None) + for d in xrange(dim)] + + def slice_i_along_d(self, i, d): + l = list(self._slice_all) + l[d] = i + return tuple(l) + + @abstractmethod + def __call__(self, ppos, pscal, d): + """ + Remesh particles at position ppos with scalar + pscal along direction d. + + @param ppos : particle position + @param pscal : particle scalar + @param d : splitting direction + """ + pass diff --git a/HySoP/hysop/numerics/splitting.py b/HySoP/hysop/numerics/splitting.py new file mode 100644 index 0000000000000000000000000000000000000000..2af8963c8dbf6b5680b5b345a3d35a570e55d212 --- /dev/null +++ b/HySoP/hysop/numerics/splitting.py @@ -0,0 +1,74 @@ +""" +@package parmepy.numerics.splitting + +Splitting operator representation +""" +from parmepy.numerics.method import NumMethod + + +class Splitting(NumMethod): + """ + Splitting operator representation. + + Operator of operators applying in a Strang splitting. + + Implements a 2nd order splitting in 3D: + @li X-dir, half time step + @li Y-dir, half time step + @li Z-dir, full time step + @li Y-dir, half time step + @li X-dir, half time step + \n + Implements a 2nd order splitting in 2D: + @li X-dir, half time step + @li Y-dir, full time step + @li X-dir, half time step + """ + + def __init__(self, function, dim, config='o2_lastFull'): + """ + Create a Splitting operator on a given list of operators and a dimension. + + @param operators : list of operators to split. + @param dim : problem dimension. + """ + self.name = "splitting" + ## Operators to split + self.function = function + ## Splitting at 2nd order. + self.splitting = [] + if config == 'o2_lastFull': + ## Half timestep in all directions but last + [self.splitting.append((i, 0.5)) for i in xrange(dim - 1)] + self.splitting.append((dim - 1, 1.)) + [self.splitting.append((dim - 2 - i, 0.5)) for i in xrange(dim - 1)] + else: + ## Half timestep in all directions + [self.splitting.append((i, 0.5)) for i in xrange(dim)] + [self.splitting.append((dim - 1 - i, 0.5)) for i in xrange(dim)] + + def __call__(self, t, dt): + """ + Apply Remeshing operator. + + @param t : current time. + @param dt : time step. + """ + for split_id, split in enumerate(self.splitting): + #print "direction : " + str(split[0]) +\ + # " dt*" + str(split[1]) + " id:" + str(split_id) + self.function(t, dt * split[1], split[0], split_id) + + def __str__(self): + s = "Splitting (DiscreteOperator). Splitting steps : \n" + for split in self.splitting: + s += "direction : " + str(split[0]) + " dt=" + str(split[1]) + "\n" + s += "Concerned operators : \n" + for op in self.operators: + s += str(op) + return s + +if __name__ == "__main__": + print __doc__ + print "- Provided class : Splitting" + print RemeshingDOp.__doc__ diff --git a/HySoP/hysop/obstacle/__init__.py b/HySoP/hysop/obstacle/__init__.py deleted file mode 100755 index b07d2f1c5eaad83311cc0dd55e921e78432f36de..0000000000000000000000000000000000000000 --- a/HySoP/hysop/obstacle/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -@package parmepy.obstacle - -Everything concerning obstacles. -""" diff --git a/HySoP/hysop/operator/advec_and_remesh_d.py b/HySoP/hysop/operator/advec_and_remesh_d.py index 38e927542794cf9c719ab805573db8a9b2964d78..1c254d071e302857d34b7c3e40f86adbbcffe7ac 100644 --- a/HySoP/hysop/operator/advec_and_remesh_d.py +++ b/HySoP/hysop/operator/advec_and_remesh_d.py @@ -73,7 +73,7 @@ class Advec_and_Remesh_d(DiscreteOperator): evt_init = self.init_copy.launch(self.scalar.gpu_data, self.particle_scalar.gpu_data) else: - if self.scalar.domain.dimension == 2: + if self.scalar.topology.dim == 2: evt_init = self.init_transpose.launch(self.scalar.gpu_data, self.particle_scalar.gpu_data) else: @@ -90,8 +90,8 @@ class Advec_and_Remesh_d(DiscreteOperator): self.particle_scalar.gpu_data, self.scalar.gpu_data, self.gpu_precision(dt), - self.gpu_precision(self.scalar.domain.origin[splittingDirection]), - self.gpu_precision(self.scalar.domain.step[splittingDirection])) + self.gpu_precision(self.scalar.topology.mesh.origin[splittingDirection]), + self.gpu_precision(self.scalar.topology.mesh.size[splittingDirection])) for df in self.output: df.contains_data = False # Get timpings from OpenCL events diff --git a/HySoP/hysop/operator/advec_scales.py b/HySoP/hysop/operator/advec_scales.py deleted file mode 100644 index 9521a4af62b44f39b3c94ad6e8ad61dc291c27f1..0000000000000000000000000000000000000000 --- a/HySoP/hysop/operator/advec_scales.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- -""" -@package parmepy.operator Advec_scales -Continous Advection operator using Scales code (Jean-Baptiste) -""" -from continuous import ContinuousOperator -from advec_scales_d import Advec_scales_d - - -class Advec_scales(ContinuousOperator): - """ - Advection scales operator representation - """ - - def __init__(self, stab_coeff=None, velocity=None, advectedField=None, addField=None): - """ - Constructor. - Create an Advection operator using scales code (JB) from given velocity variables. - - @param velocity ContinuousVectorField : velocity variable. - @param advectedField ContinuousVector/ScalarField : variable to advect. - """ - ContinuousOperator.__init__(self) - self.stab_coeff = stab_coeff - self.velocity = velocity - self.advectedField = advectedField - self.addField = addField - self.addVariable([velocity, advectedField, addField]) - - def discretize(self, idVelocityD=0, idAdvectedFieldD=0, idAddFieldD=0, topology=None): - """ - Advection operator discretization method. - Create a discrete advection operator from given specifications. - - @param idVelocityD : Index of velocity discretisation to use for advection. - @param idAdvectedFieldD : Index of field discretisation to advect. - @param topology : topology of the input fields - """ - if self.discreteOperator is None: - self.discreteOperator = Advec_scales_d(self, idVelocityD, idAdvectedFieldD, idAddFieldD, self.stab_coeff, topology) - - def __str__(self): - """ToString method""" - s = " Advection scales operator (ContinuousOperator)" - if self.discreteOperator is not None: - s += "with the following discretization:\n" - s += str(self.discreteOperator) - else: - s += ". Not discretised" - return s + "\n" - -if __name__ == "__main__": - print __doc__ - print "- Provided class : Advec_scales" - print Advec_scales.__doc__ diff --git a/HySoP/hysop/operator/advection.py b/HySoP/hysop/operator/advection.py new file mode 100644 index 0000000000000000000000000000000000000000..56dfa4b7b47bf54ad88df87401a3b0cc171e56b7 --- /dev/null +++ b/HySoP/hysop/operator/advection.py @@ -0,0 +1,88 @@ +""" +@package parmepy.operator.advection + +Advection operator representation. +""" +from parmepy.operator.continuous import Operator +from parmepy.mpi.topology import Cartesian + + +class Advection(Operator): + """ + Advection operator representation. + """ + + def __init__(self, velocity, scalar): + """ + Create an Transport operator from given variables velocity and scalar. + + @param velocity : velocity variable. + @param scalar : scalar variable. + """ + Operator.__init__(self) + ## Transport velocity + self.velocity = velocity + ## Transported scalar + self.scalar = scalar + self.addVariable([velocity, scalar]) + + def setUp(self, resolutions=None, method='', + idVelocityD=0, idScalarD=0, **other_args): + """ + Transport operator discretization method. + Create an discrete Transport operator from given specifications. + + @param idVelocityD : Index of velocity discretisation to use. + @param idScalarD : Index of scalar discretisation to use. + @param method : the method to use. + """ + print resolutions + self.resolutions = resolutions + if resolutions is None: + raise ValueError("Advection setUp: no resolution given.") + #variables discretization + print "Variables discretization" + for v in self.variables: + topo = Cartesian(v.domain, v.domain.dimension, resolutions[v]) + v.discretize(topo) + print str(v) + print 'Initialization', + v.initialize() + if self.discreteOperator is None: + if method.find('gpu_2k') >= 0: + from parmepy.operator.gpu_particle_advection_2k \ + import GPUParticleAdvection + self.discreteOperator = \ + GPUParticleAdvection(self, idVelocityD, idScalarD) + elif method.find('gpu_1k') >= 0: + from parmepy.operator.gpu_particle_advection_1k \ + import GPUParticleAdvection + self.discreteOperator = \ + GPUParticleAdvection(self, idVelocityD, idScalarD) + elif method.find('scales') >= 0: + from parmepy.operator.scales_advection \ + import ScalesAdvection + ScalesAdvection(self, idVelocityD, idScalarD) + else: + print "Using default advection operator" + from parmepy.operator.particle_advection \ + import ParticleAdvection + self.discreteOperator = \ + ParticleAdvection(self, idVelocityD, idScalarD) + self.discreteOperator.setUp(method, + **other_args) + + def __str__(self): + """ToString method""" + s = "Advection operator (ContinuousOperator)" + if self.discreteOperator is not None: + s += " with the following discretization:\n" + s += str(self.discreteOperator) + else: + s += ". Not discretised" + return s + "\n" + +if __name__ == "__main__": + print __doc__ + print "- Provided class : Advection" + print Advection.__doc__ diff --git a/HySoP/hysop/operator/continuous.py b/HySoP/hysop/operator/continuous.py index 89a6871ceec661ea1ac3cda9c0a996bc67b12c76..88aaae842f8c69b974a1208acb839e0e6a204da3 100644 --- a/HySoP/hysop/operator/continuous.py +++ b/HySoP/hysop/operator/continuous.py @@ -6,7 +6,7 @@ Operator representation. from abc import ABCMeta, abstractmethod -class ContinuousOperator: +class Operator(object): """ Continuous operator abstract description. """ @@ -22,10 +22,8 @@ class ContinuousOperator: self.variables = [] ## Operator discretization. self.discreteOperator = None - ## Is need to split operator - self.needSplitting = False ## Timings informations - self.timings_info = ["", ""] + self.time_info = ["", ""] def addVariable(self, cVariable): """ @@ -46,33 +44,25 @@ class ContinuousOperator: """ return self.discreteOperator.apply(*args) - def setMethod(self, method): - """ - Sets method to use in apply method. - @param method : the method to use. - """ - if self.discreteOperator is not None: - self.discreteOperator.setMethod(method) - else: - raise ValueError("Cannot set mumerical method to non discretized operator") - def printComputeTime(self): """Displays compute time for operator.""" if self.discreteOperator is not None: self.discreteOperator.printComputeTime() - self.timings_info = self.discreteOperator.timings_info + self.time_info = self.discreteOperator.time_info else: - raise ValueError("Cannot print compute time of a non discretized operator") + raise ValueError("Cannot print compute time" + + "of a non discretized operator") @abstractmethod - def discretize(self, *spec): + def setUp(self, *spec): """ Abstract method. Must be implemented by sub-class. @param *spec : discretization specifications. """ - raise NotImplementedError("Need to override method in a subclass of ContinuousOperator") + raise NotImplementedError("Need to override method in " + + "a subclass of ContinuousOperator") if __name__ == "__main__": print __doc__ diff --git a/HySoP/hysop/operator/diffusion.py b/HySoP/hysop/operator/diffusion.py index dedaa0a55d865eb2423c0b4a110978bcb27e55a2..e87fd29126072635167a8189198833bc88224acb 100644 --- a/HySoP/hysop/operator/diffusion.py +++ b/HySoP/hysop/operator/diffusion.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- """ -@package parmepy.operator Diffusion +@package parmepy.operator.diffusion Continous Diffusion operator (F2PY) """ -from continuous import ContinuousOperator -from diffusion_d import Diffusion_d +from parmepy.operator.continuous import Operator +from diffusion_fft import DiffusionFFT -class Diffusion(ContinuousOperator): +class Diffusion(Operator): """ Diffusion operator representation using FFT """ @@ -21,13 +21,14 @@ class Diffusion(ContinuousOperator): @param vorticity ContinuousVectorField : vorticity variable. @param viscosity : viscosity of the considered medium. """ - ContinuousOperator.__init__(self) + Operator.__init__(self) self.velocity = velocity self.vorticity = vorticity self.viscosity = viscosity self.addVariable([velocity, vorticity]) - def discretize(self, idVelocityD=0, idVorticityD=0, topology=None): + def setUp(self, resolutions=None, method='', + idVelocityD=0, idVorticityD=0): """ Diffusion operator discretization method. Create a discrete Diffusion operator from given specifications. @@ -35,8 +36,12 @@ class Diffusion(ContinuousOperator): @param idVelocityD : Index of velocity discretisation to use. @param idVelocityD : Index of vorticity discretisation to update. """ - if self.discreteOperator is None: - self.discreteOperator = Diffusion_d(self, idVelocityD, idVorticityD, self.viscosity, topology) + ## Create topology here for FFT + topology = None + self.discreteOperator = DiffusionFFT(self, + idVelocityD, idVorticityD, + self.viscosity) + self.discreteOperator(topology) def __str__(self): """ToString method""" diff --git a/HySoP/hysop/operator/diffusion_d.py b/HySoP/hysop/operator/diffusion_fft.py similarity index 93% rename from HySoP/hysop/operator/diffusion_d.py rename to HySoP/hysop/operator/diffusion_fft.py index f3af5ed3cfe8e0833aad2bfa9e9f314f8496f2b1..6c48eaec6e27d78e36899247839fa04be0031625 100644 --- a/HySoP/hysop/operator/diffusion_d.py +++ b/HySoP/hysop/operator/diffusion_fft.py @@ -4,19 +4,18 @@ Discrete Diffusion operator using FFT """ from parmepy.f2py import fftw2py -from discrete import DiscreteOperator -from parmepy.constants import ORDER, PARMES_REAL -import numpy as np +from parmepy.operator.discrete import DiscreteOperator +from parmepy.constants import np, ORDER, PARMES_REAL import time -class Diffusion_d(DiscreteOperator): +class DiffusionFFT(DiscreteOperator): """ Operator representation. """ def __init__(self, diff, idVelocityD=0, idVorticityD=0, - viscosity=0.1, topology=None): + viscosity=0.1): """ Constructor. @param velocity : descretized velocity to use. @@ -27,13 +26,15 @@ class Diffusion_d(DiscreteOperator): self.velocity = diff.velocity.discreteField[idVelocityD] self.vorticity = diff.vorticity.discreteField[idVorticityD] self.viscosity = viscosity + self.compute_time = 0. + + def setUp(self, method='', topology=None): self.topology = topology self.ghosts = topology.ghosts self.resolution = topology.mesh.resolution self.dim = topology.dim - self.compute_time = 0. - def apply(self, t, dt): + def apply(self, t, dt, ite): """ Apply operator. """ diff --git a/HySoP/hysop/operator/discrete.py b/HySoP/hysop/operator/discrete.py index 3160a4a27b69c415042eff8e5942d468b480be41..26e94fe1214938ba8736d64d924e5c682ae68277 100644 --- a/HySoP/hysop/operator/discrete.py +++ b/HySoP/hysop/operator/discrete.py @@ -6,7 +6,7 @@ Discrete operator representation. from abc import ABCMeta, abstractmethod -class DiscreteOperator: +class DiscreteOperator(object): """ Abstract description of a discretized operator. """ @@ -14,57 +14,39 @@ class DiscreteOperator: __metaclass__ = ABCMeta @abstractmethod - def __init__(self): + def __init__(self, numMethod=None): """ Create an empty discrete operator. """ ## Input variables - self.input = None + self.input = [] ## Output variables - self.output = None + self.output = [] ## variables self.variables = [] ## Operator numerical method. - self.numMethod = None - ## DiscreteOperator is a discrete operator - self.discreteOperator = self + self.numMethod = numMethod ## Total compute time self.total_time = 0. ## Operator name self.name = "?" ## Timings informations - self.timings_info = ["", ""] - - def setMethod(self, method): - """ - Set the numerical method used by operator when calling applyOperator. - - @param method : Numerical method. - """ - self.numMethod = method - - def addVariable(self, cVariable): - """ - Add an continuous variables to the operator. - - @param cVariable : list of variables to add. - """ - for v in cVariable: - if self.variables.count(v) == 0: - self.variables.append(v) + self.time_info = ["", ""] @abstractmethod def printComputeTime(self): """Print total computing time.""" - raise NotImplementedError("Need to override method in a subclass of DiscreteOperator") + raise NotImplementedError("Need to override method in " + + "a subclass of DiscreteOperator") @abstractmethod - def apply(self): + def apply(self, t, dt, ite): """ Abstract method, apply operaton on a variable. Must be implemented by sub-class. """ - raise NotImplementedError("Need to override method in a subclass of DiscreteOperator") + raise NotImplementedError("Need to override method in " + + "a subclass of DiscreteOperator") def __str__(self): """ToString method""" diff --git a/HySoP/hysop/tools/energy_enstrophy.py b/HySoP/hysop/operator/energy_enstrophy.py similarity index 90% rename from HySoP/hysop/tools/energy_enstrophy.py rename to HySoP/hysop/operator/energy_enstrophy.py index a165cd64c4cd691a141287076c3503925c404500..b80249d47bddbc20b3eb93fdf7c105cf14c55129 100644 --- a/HySoP/hysop/tools/energy_enstrophy.py +++ b/HySoP/hysop/operator/energy_enstrophy.py @@ -3,13 +3,13 @@ @package physics Compute Energy and Enstrophy """ -import numpy as np +from parmepy.constants import np import mpi4py.MPI as MPI -from parmepy.tools.diagnostics import Diagnostics +from parmepy.operator.monitors.monitoring import Monitoring import time -class Energy_enstrophy(Diagnostics): +class Energy_enstrophy(Monitoring): """ Compute the forces according the Noca s formula """ @@ -24,11 +24,10 @@ class Energy_enstrophy(Diagnostics): @param frequency : output file producing frequency. @param outputPrefix : file name prefix, contains relative path. """ - Diagnostics.__init__(self, frequency, outputPrefix) + Monitoring.__init__(self, frequency) self.topology = topology self.velocity = velocity self.vorticity = vorticity - self.frequency = frequency self.outputPrefix = outputPrefix self.dim = topology.dim self.resolution = topology.mesh.resolution @@ -38,9 +37,9 @@ class Energy_enstrophy(Diagnostics): if (self.topology.rank == 0): self.f = open(self.outputPrefix, 'w') - def process(self, ite, t, dt): + def __call__(self, t, dt, ite): """ - Computation of kinetic energy, enstrophy & + Computation of kinetic energy, enstrophy & Checking energy and enstrophy decay (--> is(recoveredViscosity == viscosity) ?) """ velo = self.velocity.discreteField[0] diff --git a/HySoP/hysop/operator/gpu_particle_advection_1k.py b/HySoP/hysop/operator/gpu_particle_advection_1k.py new file mode 100644 index 0000000000000000000000000000000000000000..02fa85c3bdeead6228091cf4072a7b04521ef0c0 --- /dev/null +++ b/HySoP/hysop/operator/gpu_particle_advection_1k.py @@ -0,0 +1,145 @@ +""" +@package parmepy.operator.gpu_particle_advection + +Discrete advection representation +""" +from parmepy.constants import np +from parmepy.operator.particle_advection import ParticleAdvection +from parmepy.fields.continuous import Field +from parmepy.fields.gpu_discrete import GPUVectorField +from parmepy.fields.gpu_discrete import GPUScalarField +from parmepy.gpu import PARMES_REAL_GPU, PARMES_DOUBLE_GPU, \ + PARMES_INTEGER_GPU, GPU_SRC, cl, \ + _check_double_precision_capability +from parmepy.numerics.gpu_kernel import KernelLauncher +from parmepy.numerics.gpu_kernel import KernelListLauncher +from parmepy.numerics.splitting import Splitting + + +class GPUParticleAdvection1k(ParticleAdvection): + """ + Particle advection operator representation on GPU. + + """ + + def __init__(self, advec, idVelocityD=0, idScalarD=0, + platform_id=0, device_id=0, + device_type='gpu'): + """ + Create a Advection operator. + Work on a given scalar at a given velocity to produce scalar + distribution at new positions. + + @param advec : Transport operator + @param idVelocityD : Index of velocity discretisation to use. + @param idScalarD : Index of scalar discretisation to use. + @param method : the method to use. + """ + raise ValueError("Not implemented") + + def setUp(self, method='', src=None, precision=PARMES_REAL_GPU): + pass + + def _apply_in_dir(self, t, dt, d, split_id): + """ + Apply advection operator. + + @param t : current time. + @param dt : time step. + @param d : Direction of splitting. + + Advection algorithm: + @li 1. Particle initialization : \n + - by copy scalar from grid to particles if previous splitting + direction equals current splitting direction.\n + - by transposition of scalar from grid to particle. + @li 2. Particle advection :\n + - compute particle position in splitting direction as a + scalar. Performs a RK2 resolution of dx_p/dt = a_p. + @li 3. Profile timings of OpenCL kernels. + """ + # Particle init + if split_id == 0: + evt_init = self.init_copy.launch(self.scalar.gpu_data, + self.part_scalar.gpu_data) + else: + if self.scalar.topology.dim == 2: + evt_init = \ + self.init_transpose.launch(self.scalar.gpu_data, + self.part_scalar.gpu_data) + else: + if min(split_id, d) == 0: + evt_init = \ + self.init_transpose.launch(0, self.scalar.gpu_data, + self.part_scalar.gpu_data) + else: + evt_init = \ + self.init_transpose.launch(1, self.gpu_scalar.gpu_data, + self.part_scalar.gpu_data) + # Advection + evt_advec = self.num_advec.launch(self.velocity.gpu_data[d], + self.part_position.gpu_data, + self.gpu_precision(dt), + self.coord_min[d], + self.mesh_size[d]) + self.num_advec.finish() + # remeshing + evt_remesh = self.num_remesh.launch(self.part_position.gpu_data, + self.part_scalar.gpu_data, + self.scalar.gpu_data, + self.coord_min[d], + self.mesh_size[d]) + for df in self.output: + df.data_on_device = True + df.contains_data = False + # Get timpings from OpenCL events + self.queue.finish() + c_time_init = (evt_init.profile.end - evt_init.profile.start) * 1e-9 + c_time_advec = (evt_advec.profile.end - evt_advec.profile.start) * 1e-9 + c_time_remesh = (evt_remesh.profile.end - + evt_remesh.profile.start) * 1e-9 + self.compute_time[d] += c_time_advec + self.compute_time_remesh[d] += c_time_remesh + if split_id == 0: + self.compute_time_copy[d] += c_time_init + else: + self.compute_time_swap[min(split_id, d)] += c_time_init + self.total_time += (c_time_advec + c_time_remesh + c_time_init) + return (c_time_advec + c_time_remesh + c_time_init) + + def apply(self, t, dt, ite): + self.numMethod(t, dt) + + def printComputeTime(self): + self.time_info[0] = "\"Advection total\" " + self.time_info[0] += "\"copy\" \"swap xy\" \"swap xz\" " + self.time_info[0] += "\"Advection x\" \"Advection y\" \"Advection z\" " + self.time_info[0] += "\"Remeshing x\" \"Remeshing y\" \"Remeshing z\" " + self.time_info[1] = str(self.total_time) + " " + self.time_info[1] += str(self.compute_time_copy[0]) + " " + self.time_info[1] += str(self.compute_time_swap[0]) + " " + self.time_info[1] += str(self.compute_time_swap[1]) + " " + self.time_info[1] += str(self.compute_time[0]) + " " + self.time_info[1] += str(self.compute_time[1]) + " " + self.time_info[1] += str(self.compute_time[2]) + " " + self.time_info[1] += str(self.compute_time_remesh[0]) + " " + self.time_info[1] += str(self.compute_time_remesh[1]) + " " + self.time_info[1] += str(self.compute_time_remesh[2]) + " " + print "Advection total time : ", self.total_time, self.call_number + print "\t Advection init copy :", self.compute_time_copy, + print self.init_copy.call_number + print "\t Advection init swap :", self.compute_time_swap, + print self.init_transpose.call_number + print "\t Advection :", self.compute_time, + print self.num_advec.call_number + print "\t Remeshing :", self.compute_time_remesh, + print self.num_remesh.call_number + + def __str__(self): + s = "Advection_P (DiscreteOperator). " + DiscreteOperator.__str__(self) + return s + +if __name__ == "__main__": + print __doc__ + print "- Provided class : Transport_d" + print AdvectionDOp.__doc__ diff --git a/HySoP/hysop/operator/gpu_particle_advection_2k.py b/HySoP/hysop/operator/gpu_particle_advection_2k.py new file mode 100644 index 0000000000000000000000000000000000000000..564cef2b9584799848cd17702a24bd6015e0f2b3 --- /dev/null +++ b/HySoP/hysop/operator/gpu_particle_advection_2k.py @@ -0,0 +1,488 @@ +""" +@package parmepy.operator.gpu_particle_advection + +Discrete advection representation +""" +from parmepy.constants import np +from parmepy.operator.particle_advection import ParticleAdvection +from parmepy.fields.continuous import Field +from parmepy.fields.gpu_discrete import GPUVectorField +from parmepy.fields.gpu_discrete import GPUScalarField +from parmepy.gpu import PARMES_REAL_GPU, PARMES_DOUBLE_GPU, \ + PARMES_INTEGER_GPU, GPU_SRC, cl, \ + _check_double_precision_capability +from parmepy.numerics.gpu_kernel import KernelLauncher +from parmepy.numerics.gpu_kernel import KernelListLauncher +from parmepy.numerics.splitting import Splitting + + +class GPUParticleAdvection(ParticleAdvection): + """ + Particle advection operator representation on GPU. + + """ + + def __init__(self, advec, idVelocityD=0, idScalarD=0, + platform_id=0, device_id=0, + device_type='gpu'): + """ + Create a Advection operator. + Work on a given scalar at a given velocity to produce scalar + distribution at new positions. + + @param advec : Transport operator + @param idVelocityD : Index of velocity discretisation to use. + @param idScalarD : Index of scalar discretisation to use. + @param method : the method to use. + """ + ParticleAdvection.__init__(self, advec, idVelocityD, idScalarD) + self.name = "advection_gpu" + self.num_method = None + self.resolution = self.scalar.topology.mesh.resolution + self.compute_time_remesh = [0., 0., 0.] + print "=== OpenCL environment ===" + #Get platform. + try: + ## OpenCL platform + self.platform = cl.get_platforms()[platform_id] + except IndexError: + print " Incorrect platform_id :", platform_id, ".", + print " Only ", len(cl.get_platforms()), " available.", + print " Getting defalut platform. " + self.platform = cl.get_platforms()[0] + print " Platform " + print " - Name :", self.platform.name + print " - Version :", self.platform.version + #Get device. + try: + ## OpenCL device + self.device = self.platform.get_devices( + eval("cl.device_type." + device_type.upper()))[device_id] + except cl.RuntimeError as e: + print "RuntimeError:", e + self.device = cl.create_some_context().devices[0] + except AttributeError as e: + print "AttributeError:", e + self.device = cl.create_some_context().devices[0] + print " Device" + print " - Name :", + print self.device.name + print " - Type :", + print cl.device_type.to_string(self.device.type) + print " - C Version :", + print self.device.opencl_c_version + print " - Global mem size :", + print self.device.global_mem_size / (1024 ** 3), "GB" + print "===\n" + #Creates GPU Context + ## OpenCL context + self.ctx = cl.Context([self.device]) + #Create CommandQueue on the GPU Context + ## OpenCL command queue + self.queue = cl.CommandQueue( + self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) + + def setUp(self, method='', src=None, precision=PARMES_REAL_GPU): + ## Result position + self.method = method + self.user_gpu_src = src + self.gpu_precision = precision + ## Optimal work item number + if len(self.resolution) == 3: + self.workItemNumber = 64 if min(self.resolution) >= 64 \ + else min(self.resolution) + else: + self.workItemNumber = 256 if min(self.resolution) >= 256 \ + else min(self.resolution) + print " Work-Item number: ", self.workItemNumber + dim = self.scalar.topology.dim + self.coord_min = np.ones(4, dtype=self.gpu_precision) + self.mesh_size = np.ones(4, dtype=self.gpu_precision) + self.coord_min[0:dim] = np.asarray(self.scalar.topology.mesh.origin, + dtype=self.gpu_precision) + self.mesh_size[0:dim] = np.asarray(self.scalar.topology.mesh.step, + dtype=self.gpu_precision) + if len(self.resolution) == 3: + self.gwi = (int(self.workItemNumber), + int(self.resolution[1]), (self.resolution[2])) + self.lwi = (int(self.workItemNumber), 1, 1) + else: + self.gwi = (int(self.workItemNumber), + int(self.resolution[1])) + self.lwi = (int(self.workItemNumber), 1) + print "=== Kernel sources ===" + self.build_options = "" + if _check_double_precision_capability(self.device, + self.gpu_precision): + self.build_options += " -cl-fp32-correctly-rounded-divide-sqrt " + if self.gpu_precision is PARMES_REAL_GPU: + self.build_options += " -cl-single-precision-constant" + self.build_options += " -D WIDTH=" + str(self.resolution[0]) + self.build_options += " -D WGN=" + str(self.workItemNumber) + self.gpu_src = self._collect_usr_cl_src(src) + self.gpu_src += self._collect_kernels_cl_src() + print "===\n" + print "=== Kernel sources compiling ===" + ## Build code + if self.gpu_precision is PARMES_REAL_GPU: + prg = cl.Program(self.ctx, self.gpu_src.replace('.0', '.0f')) + else: + prg = cl.Program(self.ctx, self.gpu_src.replace('float', 'double')) + ## OpenCL program + self.prg = prg.build(self.build_options) + print "Compiler options : ", + print self.prg.get_build_info( + self.device, cl.program_build_info.OPTIONS) + print "Compiler status : ", + print self.prg.get_build_info( + self.device, cl.program_build_info.STATUS) + print "Compiler log : ", + print self.prg.get_build_info(self.device, cl.program_build_info.LOG) + print "===\n" + print "=== OpenCL Buffer allocations ===" + self._buffer_allocations() + print "===\n" + print "=== OpenCL Buffer initialisation ===" + self._buffer_initialisations() + print "===\n" + print "=== OpenCL Kernels affectation ===" + self.num_advec = KernelLauncher(self.prg.advection, + self.queue, + self.gwi, + self.lwi) + self.num_remesh = KernelLauncher(self.prg.remeshing, + self.queue, + self.gwi, + self.lwi) + self.init_copy = KernelLauncher(self.prg.copy, + self.queue, + self.copy_gwi, + self.copy_lwi) + if len(self.resolution) == 3: + self.init_transpose = \ + KernelListLauncher([self.prg.transpose_xy, + self.prg.transpose_xz], + self.queue, + [self.transpose_xy_gwi, + self.transpose_xz_gwi], + [self.transpose_xy_lwi, + self.transpose_xz_lwi]) + else: + self.init_transpose = \ + KernelLauncher(self.prg.transpose_xy, + self.queue, + self.transpose_xy_gwi, + self.transpose_xy_lwi) + print "===\n" + self.numMethod = Splitting(self._apply_in_dir, self.scalar.dimension) + print "Method used:", self.name + + def _buffer_initialisations(self): + data, transfer_time, compute_time = 0, 0., 0. + for gpudf in self.variables: + match = 'init' + gpudf.name + initKernel = None + # Looking for initKernel + for k in self.prg.all_kernels(): + k_name = k.get_info(cl.kernel_info.FUNCTION_NAME) + if match.find(k_name) >= 0: + initKernel = cl.Kernel(self.prg, k_name) + if initKernel is not None: + kl = KernelLauncher(initKernel, self.queue, + self.gwi, self.lwi) + if gpudf.vector: + args = [gpudf.gpu_data[d] + for d in xrange(len(self.resolution))] + args.append(self.coord_min) + args.append(self.mesh_size) + evt = kl.launch(*tuple(args)) + else: + args = [gpudf.gpu_data] + args.append(self.coord_min) + args.append(self.mesh_size) + evt = kl.launch(*tuple(args)) + self.queue.finish() + temp_time = (evt.profile.end - evt.profile.start) * 1e-9 + print "Done in ", temp_time, "sec" + compute_time += temp_time + gpudf.contains_data = False + gpudf.data_on_device = True + else: + if gpudf.contains_data: + print gpudf.name, ":", + temp_data, temp_time = gpudf.toDevice() + data += temp_data + transfer_time += temp_time + if data > 0: + print "Total Transfers : ", data, "Bytes transfered at ", + print "{0:.3f} GBytes/sec".format((data * 1e-9) / transfer_time) + if compute_time > 0.: + print "Total Computing : ", compute_time, "sec" + + def _buffer_allocations(self): + ## Velocity. + GPUVectorField.fromVectorField( + self.queue, self.advec.velocity.discreteField[self.idVelocityD], + self.gpu_precision) + self.velocity = self.advec.velocity.discreteField[self.idVelocityD] + ## Transported scalar. + GPUScalarField.fromScalarField( + self.queue, self.advec.scalar.discreteField[self.idScalarD], + self.gpu_precision) + self.scalar = self.advec.scalar.discreteField[self.idScalarD] + ## Particle position + particle_position = Field(self.scalar.topology.domain, + "Particle_Position", + vector=False) + particle_position.discretize(self.scalar.topology) + GPUScalarField.fromScalarField( + self.queue, particle_position.discreteField[0], + self.gpu_precision) + self.part_position = particle_position.discreteField[0] + ## Result scalar + particle_scalar = Field(self.scalar.topology.domain, + "Particle_Scalar", + vector=False) + particle_scalar.discretize(self.scalar.topology) + GPUScalarField.fromScalarField( + self.queue, particle_scalar.discreteField[0], + self.gpu_precision) + self.part_scalar = particle_scalar.discreteField[0] + self.particle_variables = [self.part_position, self.part_scalar] + self.variables = [self.scalar, self.velocity, + self.part_position, self.part_scalar] + total_mem_used = 0 + for gpuv in self.variables: + total_mem_used += gpuv.mem_size + print "Total Global Memory used : ", total_mem_used, + print "Bytes (", total_mem_used / (1024 ** 2), "MB)", + print "({0:.3f} %)".format( + 100 * total_mem_used / (self.device.global_mem_size * 1.)) + + def _collect_kernels_cl_src(self): + gpu_src = "" + if len(self.resolution) == 3: + if self.gpu_precision is PARMES_REAL_GPU: + src_advec = 'advection_3D_opt4_builtin.cl' + else: + src_advec = 'advection_3D_opt2_builtin.cl' + else: + if self.gpu_precision is PARMES_REAL_GPU: + src_advec = 'advection_2D_opt4_builtin.cl' + else: + src_advec = 'advection_2D_builtin.cl' + f = open(GPU_SRC + src_advec) + print " - advection:", f.name + gpu_src += "".join(f.readlines()) + f.close() + ## remeshing + if len(self.resolution) == 3: + if self.method.find('m6prime'): + src_remesh = 'remeshing_3D_opt4_private4.cl' + src_remesh_weights = 'weights_m6prime_opt4_builtin.cl' + else: + src_remesh = 'remeshing_m8prime_3D_opt4_private4.cl' + src_remesh_weights = 'weights_m8prime_opt4_builtin.cl' + else: + if self.method.find('m6prime'): + src_remesh = 'remeshing_2D_opt4_noBC.cl' + src_remesh_weights = 'weights_m6prime_builtin.cl' + else: + src_remesh = 'remeshing_m8prime_2D_opt4_noBC.cl' + src_remesh_weights = 'weights_m8prime_builtin.cl' + f = open(GPU_SRC + src_remesh_weights) + gpu_src += "".join(f.readlines()) + print " - remeshing weights:", f.name + f.close() + f = open(GPU_SRC + src_remesh) + print " - remeshing:", f.name + gpu_src += "".join(f.readlines()) + f.close() + ## copy + if len(self.resolution) == 3: + if self.gpu_precision is PARMES_REAL_GPU: + src_copy = 'copy_3D_opt4.cl' + self.build_options += " -D TILE_DIM_COPY=16 -D BLOCK_ROWS_COPY=8" + self.copy_gwi = (int(self.resolution[0] / 4), int(self.resolution[1] / 2), int(self.resolution[2])) + self.copy_lwi = (4, 8, 1) + else: + src_copy = 'copy_3D_locMem.cl' + self.build_options += " -D TILE_DIM_COPY=32 -D BLOCK_ROWS_COPY=8" + self.copy_gwi = (int(self.resolution[0]), int(self.resolution[1] / 4), int(self.resolution[2])) + self.copy_lwi = (32, 8, 1) + else: + if self.gpu_precision is PARMES_REAL_GPU: + src_copy = 'copy_2D_opt2.cl' + self.build_options += " -D TILE_DIM_COPY=16 -D BLOCK_ROWS_COPY=8" + self.copy_gwi = (int(self.resolution[0] / 2), int(self.resolution[1] / 2)) + self.copy_lwi = (8, 8) + else: + src_copy = 'copy_2D_opt2.cl' + self.build_options += " -D TILE_DIM_COPY=32 -D BLOCK_ROWS_COPY=2" + self.copy_gwi = (int(self.resolution[0] / 2), int(self.resolution[1] / 16)) + self.copy_lwi = (16, 2) + f = open(GPU_SRC + src_copy) + print " - copy:", f.name + gpu_src += "".join(f.readlines()) + f.close() + ## transpose + if len(self.resolution) == 3: + if self.gpu_precision is PARMES_REAL_GPU: + src_transpose_xy = 'transpose_3D_xy_coalesced_locPad_Diag_2Dslice_opt2.cl' + self.build_options += " -D TILE_DIM_XY=16 -D BLOCK_ROWS_XY=8" + self.transpose_xy_gwi = (int(self.resolution[0] / 2), int(self.resolution[1] / 2), int(self.resolution[2])) + self.transpose_xy_lwi = (8, 8, 1) + src_transpose_xz = 'transpose_3D_xz_coalesced_locPad_Diag_bis_3DBlock.cl' + self.build_options += " -D TILE_DIM_XZ=16 -D BLOCK_ROWS_XZ=4" + self.transpose_xz_gwi = (int(self.resolution[0]), int(self.resolution[1] / 4), int(self.resolution[2] / 4)) + self.transpose_xz_lwi = (16, 4, 4) + else: + src_transpose_xy = 'transpose_3D_xy_coalesced_locPad_Diag_2Dslice_opt4.cl' + self.build_options += " -D TILE_DIM_XY=32 -D BLOCK_ROWS_XY=4" + self.transpose_xy_gwi = (int(self.resolution[0] / 4), int(self.resolution[1] / 8), int(self.resolution[2])) + self.transpose_xy_lwi = (8, 4, 1) + src_transpose_xz = 'transpose_3D_xz_coalesced_Diag_bis_3DBlock.cl' + self.build_options += " -D TILE_DIM_XZ=8 -D BLOCK_ROWS_XZ=2" + self.transpose_xz_gwi = (int(self.resolution[0]), int(self.resolution[1] / 4), int(self.resolution[2] / 4)) + self.transpose_xz_lwi = (8, 2, 2) + else: + if self.gpu_precision is PARMES_REAL_GPU: + src_transpose_xy = 'transpose_2D_xy_coalesced_locPad_Diag_opt4.cl' + self.build_options += " -D TILE_DIM_XY=32 -D BLOCK_ROWS_XY=8" + self.transpose_xy_gwi = (int(self.resolution[0] / 4), int(self.resolution[1]) / 4) + self.transpose_xy_lwi = (8, 8) + else: + src_transpose_xy = 'transpose_2D_xy_coalesced_locPad_Diag_opt4.cl' + self.build_options += " -D TILE_DIM_XY=32 -D BLOCK_ROWS_XY=2" + self.transpose_xy_gwi = (int(self.resolution[0] / 4), int(self.resolution[1]) / 16) + self.transpose_xy_lwi = (8, 2) + f = open(GPU_SRC + src_transpose_xy) + print " - transpose_xy:", f.name + gpu_src += "".join(f.readlines()) + f.close() + if len(self.resolution) == 3: + f = open(GPU_SRC + src_transpose_xz) + gpu_src += "".join(f.readlines()) + f.close() + return gpu_src + + def _collect_usr_cl_src(self, usr_src): + print "usr src", usr_src + gpu_src = "" + if usr_src is not None: + if isinstance(usr_src, list): + for src in usr_src: + print "Sources files (user): ", src + f = open(src, 'r') + gpu_src += "".join(f.readlines()) + f.close() + else: + print "Sources files (user): ", usr_src + f = open(usr_src, 'r') + gpu_src += "".join(f.readlines()) + f.close() + return gpu_src + + def _apply_in_dir(self, t, dt, d, split_id): + """ + Apply advection operator. + + @param t : current time. + @param dt : time step. + @param d : Direction of splitting. + + Advection algorithm: + @li 1. Particle initialization : \n + - by copy scalar from grid to particles if previous splitting + direction equals current splitting direction.\n + - by transposition of scalar from grid to particle. + @li 2. Particle advection :\n + - compute particle position in splitting direction as a + scalar. Performs a RK2 resolution of dx_p/dt = a_p. + @li 3. Profile timings of OpenCL kernels. + """ + # Particle init + if split_id == 0: + evt_init = self.init_copy.launch(self.scalar.gpu_data, + self.part_scalar.gpu_data) + else: + if self.scalar.topology.dim == 2: + evt_init = \ + self.init_transpose.launch(self.scalar.gpu_data, + self.part_scalar.gpu_data) + else: + if min(split_id, d) == 0: + evt_init = \ + self.init_transpose.launch(0, self.scalar.gpu_data, + self.part_scalar.gpu_data) + else: + evt_init = \ + self.init_transpose.launch(1, self.gpu_scalar.gpu_data, + self.part_scalar.gpu_data) + # Advection + evt_advec = self.num_advec.launch(self.velocity.gpu_data[d], + self.part_position.gpu_data, + self.gpu_precision(dt), + self.coord_min[d], + self.mesh_size[d]) + self.num_advec.finish() + # remeshing + evt_remesh = self.num_remesh.launch(self.part_position.gpu_data, + self.part_scalar.gpu_data, + self.scalar.gpu_data, + self.coord_min[d], + self.mesh_size[d]) + for df in self.output: + df.data_on_device = True + df.contains_data = False + # Get timpings from OpenCL events + self.queue.finish() + c_time_init = (evt_init.profile.end - evt_init.profile.start) * 1e-9 + c_time_advec = (evt_advec.profile.end - evt_advec.profile.start) * 1e-9 + c_time_remesh = (evt_remesh.profile.end - + evt_remesh.profile.start) * 1e-9 + self.compute_time[d] += c_time_advec + self.compute_time_remesh[d] += c_time_remesh + if split_id == 0: + self.compute_time_copy[d] += c_time_init + else: + self.compute_time_swap[min(split_id, d)] += c_time_init + self.total_time += (c_time_advec + c_time_remesh + c_time_init) + return (c_time_advec + c_time_remesh + c_time_init) + + def apply(self, t, dt, ite): + self.numMethod(t, dt) + + def printComputeTime(self): + self.time_info[0] = "\"Advection total\" " + self.time_info[0] += "\"copy\" \"swap xy\" \"swap xz\" " + self.time_info[0] += "\"Advection x\" \"Advection y\" \"Advection z\" " + self.time_info[0] += "\"Remeshing x\" \"Remeshing y\" \"Remeshing z\" " + self.time_info[1] = str(self.total_time) + " " + self.time_info[1] += str(self.compute_time_copy[0]) + " " + self.time_info[1] += str(self.compute_time_swap[0]) + " " + self.time_info[1] += str(self.compute_time_swap[1]) + " " + self.time_info[1] += str(self.compute_time[0]) + " " + self.time_info[1] += str(self.compute_time[1]) + " " + self.time_info[1] += str(self.compute_time[2]) + " " + self.time_info[1] += str(self.compute_time_remesh[0]) + " " + self.time_info[1] += str(self.compute_time_remesh[1]) + " " + self.time_info[1] += str(self.compute_time_remesh[2]) + " " + print "Advection total time : ", self.total_time, self.call_number + print "\t Advection init copy :", self.compute_time_copy, + print self.init_copy.call_number + print "\t Advection init swap :", self.compute_time_swap, + print self.init_transpose.call_number + print "\t Advection :", self.compute_time, + print self.num_advec.call_number + print "\t Remeshing :", self.compute_time_remesh, + print self.num_remesh.call_number + + def __str__(self): + s = "Advection_P (DiscreteOperator). " + DiscreteOperator.__str__(self) + return s + +if __name__ == "__main__": + print __doc__ + print "- Provided class : Transport_d" + print AdvectionDOp.__doc__ diff --git a/HySoP/hysop/operator/monitors/__init__.py b/HySoP/hysop/operator/monitors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..430a206ea3619dd4a0923cf131408ed6e1825a44 --- /dev/null +++ b/HySoP/hysop/operator/monitors/__init__.py @@ -0,0 +1,5 @@ +""" +@package parmepy.operator + +Everything concerning operators. +""" diff --git a/HySoP/hysop/tools/compute_forces.py b/HySoP/hysop/operator/monitors/compute_forces.py similarity index 95% rename from HySoP/hysop/tools/compute_forces.py rename to HySoP/hysop/operator/monitors/compute_forces.py index 0dc4efeb18aa95476db2456ceb4349452e4979c7..53c85bb73ba60a7316baa1843a00204c555da3af 100644 --- a/HySoP/hysop/tools/compute_forces.py +++ b/HySoP/hysop/operator/monitors/compute_forces.py @@ -3,16 +3,15 @@ @package physics Compute forces """ -from parmepy.constants import PARMES_REAL, PARMES_INTEGER, PI -import numpy as np +from parmepy.constants import np, PARMES_REAL, PARMES_INTEGER, PI from parmepy.operator.fct2op import Fct2Op from parmepy.operator.differentialOperator_d import DifferentialOperator_d -from parmepy.tools.diagnostics import Diagnostics +from parmepy.operator.monitors.monitoring import Monitoring from parmepy.mpi import main_comm, MPI import time -class Compute_forces(Diagnostics): +class Compute_forces(Monitoring): """ Compute the forces according the Noca s formula """ @@ -22,16 +21,16 @@ class Compute_forces(Diagnostics): frequency=None, outputPrefix=None): """ Constructor. - @param velocity : Continuous velocity field - @param vorticity : Continuous vorticity field + @param velocity : Continuous velocity field + @param vorticity : Continuous vorticity field @param topology : Local topology - @param obstacle : Continuous obstacle - @param boxMin : Global minimum coordinates of the control volume - @param boxMax : Global maximum coordinates of the control volume + @param obstacle : Continuous obstacle + @param boxMin : Global minimum coordinates of the control volume + @param boxMax : Global maximum coordinates of the control volume @param frequency : output file producing frequency. @param outputPrefix : file name prefix, contains relative path. """ - Diagnostics.__init__(self, frequency, outputPrefix) + Monitoring.__init__(self, frequency) self.velocity = velocity self.vorticity = vorticity self.topology = topology @@ -39,7 +38,6 @@ class Compute_forces(Diagnostics): self.boxMin = boxMin self.boxMax = boxMax self.Re = Reynolds - self.frequency = frequency self.outputPrefix = outputPrefix self.dim = topology.dim self.res = topology.mesh.resolution @@ -78,18 +76,18 @@ class Compute_forces(Diagnostics): distMin = self.boxMin - self.coordMin distMax = self.boxMax - self.coordMax for i in xrange(self.dim): - if (distMin[i]>=0. and distMax[i]<=0.): + if (distMin[i]>=0. and distMax[i]<=0.): # the control volume is included inside the local domain tmp_ind[i][0] = distMin[i] // self.step[i] # Down, West, South tmp_ind[i][1] = self.res[i] -2. * self.ghosts[i] + distMax[i] // self.step[i] -1# Up, East, North - if (distMin[i]>=0. and self.boxMin[i]<= self.coordMax[i] and distMax[i]>=0.): + if (distMin[i]>=0. and self.boxMin[i]<= self.coordMax[i] and distMax[i]>=0.): # only min corner of control volume is included inside the local domain tmp_ind[i][0] = distMin[i] // self.step[i] tmp_ind[i][1] = self.res[i] -2. * self.ghosts[i] - 1 - if (distMin[i]<=0. and self.boxMax[i]>= self.coordMin[i] and distMax[i]<=0.) : + if (distMin[i]<=0. and self.boxMax[i]>= self.coordMin[i] and distMax[i]<=0.) : # only max corner of control volume is included inside the local domain tmp_ind[i][1] = self.res[i] -2.*self.ghosts[i] + distMax[i] // self.step[i] -1 - if (distMin[i]<=0. and distMax[i]>=0.): + if (distMin[i]<=0. and distMax[i]>=0.): # the local domain is included inside the control volume tmp_ind[i][1] = self.res[i] - 2. * self.ghosts[i] - 1 @@ -108,7 +106,7 @@ class Compute_forces(Diagnostics): # ind[self.Up] = ind[self.Up] - 1 # ind[self.East] = ind[self.East] - 1 # ind[self.North] = ind[self.North] - 1 - + # Count the number of points on each face and inside the domain nbPoints = np.zeros(2 * self.dim, dtype=PARMES_INTEGER) nbPoints[self.Down] = (ind[self.East] - ind[self.West] + 1) * (ind[self.North] - ind[self.South] + 1) @@ -218,7 +216,7 @@ class Compute_forces(Diagnostics): uinf = 1. self.coef = 2. / (uinf ** 2 * PI * self.obstacle.radius ** 2) - def process(self, ite, t, dt): + def __call__(self, t, dt, ite): """ Computation of the drag according to the "impulsion" formula presented by Noca et. al in Journal of Fluids and Structures, 1999 @@ -261,9 +259,9 @@ class Compute_forces(Diagnostics): def integrateOnBox(self, force, vort, chi, dvol, dt): """ - Return -1/(dim-1)*d/dt int_over_control_box coord X vorticity + Return -1/(dim-1)*d/dt int_over_control_box coord X vorticity """ - fact = - dvol / ((self.dim - 1) * dt) + fact = - dvol / ((self.dim - 1) * dt) int1 = np.zeros(self.dim, dtype=PARMES_REAL) # For all points in the box for ind in xrange (chi.shape[1]): @@ -279,13 +277,13 @@ class Compute_forces(Diagnostics): force = force + fact * (int1 - self.bufferForce) # 1st order time integration self.bufferForce = int1 # Save for next time step ... - return force + return force def integrateOnSurface(self, force, velo, vort, chi, NormalVec, Re, dsurf): """ Compute integrals on surface to calculate forces acting on the body. - See (2.1) of Noca 1999 or (52) of Plouhmans 2002 - Integrals on the obstacle are neglected. + See (2.1) of Noca 1999 or (52) of Plouhmans 2002 + Integrals on the obstacle are neglected. """ fact = 1. / (self.dim - 1) @@ -353,7 +351,7 @@ class Compute_forces(Diagnostics): X_n_DivT[2] = - coords[0] * self.a[i,j,k] - coords[1] * self.b[i,j,k] n_T[0] = - 1. / self.Re * (self.jacobian[2][i,j,k] + self.jacobian[6][i,j,k]) n_T[1] = - 1. / self.Re * (self.jacobian[5][i,j,k] + self.jacobian[7][i,j,k]) - n_T[2] = - 1. / self.Re * 2 * self.jacobian[8][i,j,k] + n_T[2] = - 1. / self.Re * 2 * self.jacobian[8][i,j,k] if (chi.all() == self.chi_north.all()): # normalVec = (0,0,1) X_n_DivT[0] = - coords[2] * self.a[i,j,k] @@ -361,7 +359,7 @@ class Compute_forces(Diagnostics): X_n_DivT[2] = coords[0] * self.a[i,j,k] + coords[1] * self.b[i,j,k] n_T[0] = 1. / self.Re * (self.jacobian[2][i,j,k] + self.jacobian[6][i,j,k]) n_T[1] = 1. / self.Re * (self.jacobian[5][i,j,k] + self.jacobian[7][i,j,k]) - n_T[2] = 1. / self.Re * 2 * self.jacobian[8][i,j,k] + n_T[2] = 1. / self.Re * 2 * self.jacobian[8][i,j,k] int2 = int2 + fact * X_n_DivT + n_T # Product with element of surface and sum to the total (input) force diff --git a/HySoP/hysop/operator/monitors/monitoring.py b/HySoP/hysop/operator/monitors/monitoring.py new file mode 100644 index 0000000000000000000000000000000000000000..ac23e3aaa3e31ae5f1e36757c7077d0dfaa09458 --- /dev/null +++ b/HySoP/hysop/operator/monitors/monitoring.py @@ -0,0 +1,34 @@ +""" +@package parmepy.operator.monitors.monitoring +""" +from abc import ABCMeta, abstractmethod +from parmepy.operator.continuous import Operator + + +class Monitoring(Operator): + """Abstract description a monitor. """ + + __metaclass__ = ABCMeta + + @abstractmethod + def __init__(self, frequency): + """ Constructor + @param dimension integer : domain dimension. + """ + Operator.__init__(self) + ## Monitor frequency + self.freq = frequency + + def setUp(self, *spec): + pass + + def printComputeTime(self): + pass + + +if __name__ == "__main__": + print __doc__ + print "- Provided class : Monitoring (abstract)." + print Monitoring.__doc__ + + diff --git a/HySoP/hysop/tools/printer.py b/HySoP/hysop/operator/monitors/printer.py similarity index 57% rename from HySoP/hysop/tools/printer.py rename to HySoP/hysop/operator/monitors/printer.py index d29e85438207437fd5b678d5b4ff4847d29d9059..1e6c7c1dda56ba039ed1bb34f362fcb700c86caa 100644 --- a/HySoP/hysop/tools/printer.py +++ b/HySoP/hysop/operator/monitors/printer.py @@ -3,34 +3,33 @@ Classes for handling ouputs. """ -from ..constants import * +from parmepy.constants import np, S_DIR +from parmepy.operator.monitors.monitoring import Monitoring import evtk.hl as evtk import time -class Printer(): +class Printer(Monitoring): """ Basic printer. Performs outputs in VTK images. """ - def __init__(self, fields=[], frequency=0, outputPrefix='./out_'): + def __init__(self, frequency=0, fields=[], outputPrefix='./out_'): """ - Create a results printer for given fields, filename prefix (relative path) and an output frequency. + Create a results printer for given fields, filename + prefix (relative path) and an output frequency. @param fields : Fields to output. @param frequency : output file producing frequency. @param outputPrefix : file name prefix, contains relative path. """ - ## Printer frequency - self.freq = frequency + Monitoring.__init__(self, frequency) ## Filename prefix self.outputPrefix = outputPrefix ## Fields to output self.fields = fields - ## Iterations counter - self.ite = 0 ## Method to collect data in case of distributed data self.get_data_method = None if self.freq != 0: @@ -40,6 +39,10 @@ class Printer(): self.step = self._passStep ## Printing compute time self.compute_time = 0. + self.call_number = 0 + + def apply(self, t, dt, ite): + self.step(ite) def _build_vtk_dict(self): """Build a dictionary from fields to VTK image.""" @@ -49,7 +52,8 @@ class Printer(): if f.vector: for d in xrange(len(df.data)): if len(df.data[d].shape) == 2: - res[df.name + S_DIR[d]] = np.expand_dims(df.data[d], axis=2) + res[df.name + S_DIR[d]] = np.expand_dims( + df.data[d], axis=2) else: res[df.name + S_DIR[d]] = df.data[d] else: @@ -64,13 +68,14 @@ class Printer(): if callable(method): self.get_data_method = method else: - raise ValueError("Cannot set non callable method to get data to print. Given method : " + str(method)) + raise ValueError("Cannot set non callable method to get data " + + "to print. Given method : " + str(method)) - def _passStep(self): + def _passStep(self, ite): """A code method that do nothing.""" pass - def _printStep(self): + def _printStep(self, ite): """ Printer core method. @@ -78,54 +83,63 @@ class Printer(): VTK images are writed. If fails, classical ascii output is performed. """ t = time.time() - if (self.ite % self.freq) == 0: + if (ite % self.freq) == 0: + self.call_number += 1 print "== IO" - if self.get_data_method is not None: - for f in self.fields: - for df in f.discreteField: - if not df.contains_data: - print "Data transfer : ", f.name, ":", - self.get_data_method(df) - filename = self.outputPrefix + "results_{0:05d}.dat".format(self.ite) + for f in self.fields: + for df in f.discreteField: + df.toHost() + filename = self.outputPrefix + "results_{0:05d}.dat".format(ite) print "Print file : " + filename ## VTK output \todo: Need fix in 2D, getting an IOError. - try: + if self.fields[0].domain.dimension == 3: evtk.imageToVTK(filename, pointData=self._build_vtk_dict()) - print "not yet implemented" - except IOError: + else: ## Standard output f = open(filename, 'w') - shape = self.fields[0].domain.discreteDomain.resolution - step = self.fields[0].domain.discreteDomain.step + shape = self.fields[0].discreteField[0].topology.mesh.resolution + coords = self.fields[0].discreteField[0].topology.mesh.coords if len(shape) == 2: - for i in xrange(shape[0]-1): - for j in xrange(shape[1]-1): - f.write("{0:8.12} {1:8.12} ".format(i * step[0], j * step[1])) + for i in xrange(shape[0] - 1): + for j in xrange(shape[1] - 1): + f.write("{0:8.12} {1:8.12} ".format( + coords[0][i, 0], coords[1][0, j])) for field in self.fields: if field.vector: - f.write("{0:8.12} {1:8.12} ".format(field.discreteField[0][0][i, j], - field.discreteField[0][1][i, j])) + f.write("{0:8.12} {1:8.12} ".format( + field.discreteField[0][0][i, j], + field.discreteField[0][1][i, j])) else: - f.write("{0:8.12} ".format(field.discreteField[0][i, j])) + f.write("{0:8.12} ".format( + field.discreteField[0][i, j])) f.write("\n") else: - for i in xrange(shape[0]-1): - for j in xrange(shape[1]-1): - for k in xrange(shape[2]-1): - f.write("{0:8.12} {1:8.12} {2:8.12} ".format(i * step[0], j * step[1], k * step[2])) + for i in xrange(shape[0] - 1): + for j in xrange(shape[1] - 1): + for k in xrange(shape[2] - 1): + f.write("{0:8.12} {1:8.12} {2:8.12} ".format( + coords[0][i, 0, 0], + coords[1][0, j, 0], + coords[2][0, 0, k])) for field in self.fields: if field.vector: - f.write("{0:8.12} {1:8.12} {2:8.12} ".format(field.discreteField[0][0][i, j, k], - field.discreteField[0][1][i, j, k], - field.discreteField[0][2][i, j, k])) + f.write("{0:8.12} {1:8.12} {2:8.12} ".format( + field.discreteField[0][0][i, j, k], + field.discreteField[0][1][i, j, k], + field.discreteField[0][2][i, j, k])) else: - f.write("{0:8.12} ".format(field.discreteField[0][i, j, k])) + f.write("{0:8.12} ".format( + field.discreteField[0][i, j, k])) f.write("\n") f.close() print "==\n" - self.ite += 1 self.compute_time += (time.time() - t) + def printComputeTime(self): + self.time_info[0] = "\"IO\" " + self.time_info[1] = str(self.compute_time) + " " + print "IO total time : ", self.compute_time, self.call_number + if __name__ == "__main__": print __doc__ diff --git a/HySoP/hysop/operator/multiphase.py b/HySoP/hysop/operator/multiphase.py index 2756c963c480bf4cc34288267927a64fb444d8b0..20eb5f64e4bc7596f677943250382fefd3311a7f 100644 --- a/HySoP/hysop/operator/multiphase.py +++ b/HySoP/hysop/operator/multiphase.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- """ -@package parmepy.operatorPressure +@package parmepy.operator.multiphase MultiPhase Rot Grad P """ -from continuous import ContinuousOperator +from parmepy.operator.continuous import Operator from stretching_d import Stretching_d -class Pressure(ContinuousOperator): +class Pressure(Operator): """ Pressure operator representation """ @@ -28,7 +28,7 @@ class Pressure(ContinuousOperator): self.viscosity = viscosity self.addVariable([velocity, vorticity, density]) - def discretize(self, topology=None, idVelocityD=0, idVorticityD=0, idDensityD=0): + def setUp(self, resolutions=None, idVelocityD=0, idVorticityD=0, idDensityD=0): """ Pressure operator discretization method. Create a discrete Pressure operator from given specifications. @@ -39,8 +39,11 @@ class Pressure(ContinuousOperator): @param propertyOp : choice of the property garantied by the stretching Op ( divConservation or massConservation) @param method : the method to use. (Euler, RK, ..) default : RK3 """ - if self.discreteOperator is None: - self.discreteOperator = Pressure_d(self, topology, idVelocityD, idVorticityD, idDensityD) + #compute topologies here + self.topology = None + self.discreteOperator = Pressure_d(self, idVelocityD, idVorticityD, idDensityD) + self.discreteOperator.setUp(self.topology) + def __str__(self): """ToString method""" diff --git a/HySoP/hysop/operator/multiphase_d.py b/HySoP/hysop/operator/multiphase_d.py index 7a83dacc00aa4e304a267a4269330cf4ad9bb550..2a685b4e27847b2232024adf132ac6556cd50959 100644 --- a/HySoP/hysop/operator/multiphase_d.py +++ b/HySoP/hysop/operator/multiphase_d.py @@ -3,10 +3,10 @@ @package operator Discrete MultiPhase Rot Grad P """ -from discrete import DiscreteOperator -from differentialOperator import DifferentialOperator -from differentialOperator_d import DifferentialOperator_d -import numpy as np +from parmepy.operator.discrete import DiscreteOperator +from parmepy.operator.differentialOperator import DifferentialOperator +from parmepy.operator.differentialOperator_d import DifferentialOperator_d +from parmepy.constants import np import numpy.testing as npt import time import sys @@ -16,7 +16,8 @@ class Pressure_d(DiscreteOperator): Operator representation. """ - def __init__(self,pressureOp , topology=None, IdVelocityD=0, IdVorticityD=0, IdDensityD=0 ): + def __init__(self,pressureOp, IdVelocityD=0, + IdVorticityD=0, IdDensityD=0 ): """ Constructor. Create the old velocity field to compute the -Dp/rho @@ -24,20 +25,22 @@ class Pressure_d(DiscreteOperator): @param operator. """ DiscreteOperator.__init__(self) + self.velocity_old = np.copy(pressureOp.velocity.discreteField[IdVelocityD].data) + self.velocity_new = pressureOp.velocity.discreteField[IdVelocityD] + self.vorticity = pressureOp.vorticity.discreteField[IdVorticityD] + self.density = pressureOp.density.discreteField[IdDensityD] + self.compute_time = 0. + + def setUp(self, method='', topology=None): ## Topology of the obstacle if(topology is not None): self.topology = topology else: raise NotImplementedError() - self.velocity_old = np.copy(pressureOp.velocity.discreteField[IdVelocityD].data) - self.velocity_new = pressureOp.velocity.discreteField[IdVelocityD] - self.vorticity = pressureOp.vorticity.discreteField[IdVorticityD] - self.density = pressureOp.density.discreteField[IdDensityD] self.resolution = self.topology.mesh.resolution self.meshSize = self.topology.mesh.size - self.compute_time = 0. - def apply(self, t, dt): + def apply(self, t, dt, ite): """ Apply operator. """ @@ -61,7 +64,7 @@ class Pressure_d(DiscreteOperator): self.timings_info[0] = "\"Pressure calculation total\"" # self.timings_info[1] = str(self.total_time) # print "Multiphase total time : ", self.total_time - print "Time of the last Dp/rho calculation loop :", self.compute_time + print "Time of the last Dp/rho calculation loop :", self.compute_time def __str__(self): s = "Pressure_d (DiscreteOperator). " + DiscreteOperator.__str__(self) diff --git a/HySoP/hysop/operator/particle_advection.py b/HySoP/hysop/operator/particle_advection.py new file mode 100644 index 0000000000000000000000000000000000000000..3f58ecee3f23fe13ed6b21d0289efb42c17e70d9 --- /dev/null +++ b/HySoP/hysop/operator/particle_advection.py @@ -0,0 +1,140 @@ +""" +@package parmepy.operator.particle_advection + +Discrete advection representation +""" +from parmepy.constants import np +from parmepy.operator.discrete import DiscreteOperator +from parmepy.fields.continuous import Field +from parmepy.gpu import PARMES_REAL_GPU +from parmepy.numerics.interpolation import Linear +from parmepy.numerics.splitting import Splitting +import time + + +class ParticleAdvection(DiscreteOperator): + """ + Particle advection operator representation. + + """ + + def __init__(self, advec, idVelocityD=0, idScalarD=0): + """ + Create a Advection operator. + Work on a given scalar at a given velocity to produce scalar + distribution at new positions. + + @param advec : Transport operator + @param idVelocityD : Index of velocity discretisation to use. + @param idScalarD : Index of scalar discretisation to use. + @param result_position : result position. + @param result_scalar : result scalar. + @param method : the method to use. + """ + DiscreteOperator.__init__(self) + self.idVelocityD = idVelocityD + self.idScalarD = idScalarD + self.advec = advec + ## Velocity. + self.velocity = advec.velocity.discreteField[idVelocityD] + ## Transported scalar. + self.scalar = advec.scalar.discreteField[idScalarD] + self.variables = [self.velocity, self.scalar] + self.input = [self.velocity, self.scalar] + self.output = [self.scalar] + self.method = None + ## Compute time detailed per directions + self.compute_time = [0., 0., 0.] + ## Compute time for copy detailed per directions + self.compute_time_copy = [0., 0., 0.] + ## Compute time for transposition detailed per directions + self.compute_time_swap = [0., 0., 0.] + self.call_number = [0, 0, 0] + self.name = "advection_default" + self.num_method = None + + def setUp(self, method=''): + ## Result position + particle_position = Field(self.advec.scalar.domain, + "Particle_Position", + vector=False) + particle_position.discretize(self.scalar.topology) + self.part_position = particle_position.discreteField[0] + ## Result scalar + particle_scalar = Field(self.advec.scalar.domain, + "Particle_Scalar", + vector=False) + particle_scalar.discretize(self.scalar.topology) + self.part_scalar = particle_scalar.discreteField[0] + self.method = method + if self.method.find('rk2') >= 0: + self.name += '_rk2' + from parmepy.numerics.integrators.runge_kutta2 import RK2 + self.num_advec = RK2() + else: + self.name += '_rk2' + from parmepy.numerics.integrators.runge_kutta2 import RK2 + self.num_advec = RK2() + if self.method.find('m6prime') >= 0: + self.name += '_m6prime' + from parmepy.numerics.remeshing.m6prime import M6Prime + self.num_remesh = M6Prime(self.scalar.dimension) + else: + self.name += '_m6prime' + from parmepy.numerics.remeshing.m6prime import M6Prime + self.num_remesh = M6Prime(self.scalar.dimension) + self.interpolation = [Linear(self.velocity[d]) + for d in xrange(self.scalar.dimension)] + self.numMethod = Splitting(self._apply_in_dir, self.scalar.dimension) + print "Method used:", self.name + + def _apply_in_dir(self, t, dt, d, split_id): + """ + Apply advection operator. + + @param t : current time. + @param dt : time step. + @param d : Direction of splitting. + + Advection algorithm: + @li 1. Particle initialization : \n + - by copy scalar from grid to particles if previous splitting + direction equals current splitting direction.\n + - by transposition of scalar from grid to particle. + @li 2. Particle advection :\n + - compute particle position in splitting direction as a + scalar. Performs a RK2 resolution of dx_p/dt = a_p. + @li 3. Profile timings of OpenCL kernels. + """ + self.part_scalar[...] = self.scalar[...] + self.part_position[...] = self.scalar.topology.mesh.coords[d] + self.part_position[...] = self.num_advec(self.interpolation[d], + t, dt, + self.part_position.data) + self.scalar[...] = self.num_remesh(self.part_position.data, + self.part_scalar.data, + d) + + def apply(self, t, dt, ite): + self.numMethod(t, dt) + + + def printComputeTime(self): + pass + # self.timings_info[0] = "\"Advection total\" \"copy\" \"swap xy\" \"swap xz\" \"Advection x\" \"Advection y\" \"Advection z\" " + # self.timings_info[1] = str(self.total_time) + " " + str(self.compute_time_copy[0]) + " " + # self.timings_info[1] += str(self.compute_time_swap[0]) + " " + str(self.compute_time_swap[1]) + " " + # self.timings_info[1] += str(self.compute_time[0]) + " " + str(self.compute_time[1]) + " " + str(self.compute_time[2]) + " " + # print "Advection total time : ", self.total_time, self.call_number + # print "\t Advection init copy :", self.compute_time_copy, self.init_copy.call_number + # print "\t Advection init swap :", self.compute_time_swap, self.init_transpose.call_number + # print "\t Advection :", self.compute_time, self.numMethod.call_number + + def __str__(self): + s = "Advection_P (DiscreteOperator). " + DiscreteOperator.__str__(self) + return s + +if __name__ == "__main__": + print __doc__ + print "- Provided class : Transport_d" + print AdvectionDOp.__doc__ diff --git a/HySoP/hysop/operator/penalization.py b/HySoP/hysop/operator/penalization.py index d667256b1faa9937f2a74dc606dfa7f80a5c4472..16f490e6c64108023fc8e25fd3794420532e41d3 100644 --- a/HySoP/hysop/operator/penalization.py +++ b/HySoP/hysop/operator/penalization.py @@ -4,11 +4,11 @@ Penalization operator representation """ -from continuous import ContinuousOperator -from penalization_d import Penalization_d +from parmepy.operator.continuous import Operator +from parmepy.operator.penalization_d import Penalization_d -class Penalization(ContinuousOperator): +class Penalization(Operator): """ Penalization operator representation """ @@ -31,14 +31,14 @@ class Penalization(ContinuousOperator): self.lambd = lambd self.addVariable([velocity, vorticity]) - def discretize(self, idVelocityD=0, idVorticityD=0, idObstacleD=0): + def setUp(self, idVelocityD=0, idVorticityD=0, idObstacleD=0): """ Penalization operator discretization method. Create a PenalizationDOp.PenalizationDOp from given specifications. """ - if self.discreteOperator is None: - self.discreteOperator = Penalization_d(self, idVelocityD, idVorticityD, idObstacleD) + self.discreteOperator = Penalization_d(self, idVelocityD, idVorticityD, idObstacleD) + self.discreteOperator.setUp() def __str__(self): """ToString method""" diff --git a/HySoP/hysop/operator/penalization_d.py b/HySoP/hysop/operator/penalization_d.py index 4b7bb9d110fb31f6560426c52b43d9eeb35dafe3..8341bf7010c46d3460174e7c405eb1ea1ea8af26 100644 --- a/HySoP/hysop/operator/penalization_d.py +++ b/HySoP/hysop/operator/penalization_d.py @@ -3,11 +3,10 @@ @package operator Discrete penalization representation """ -from discrete import DiscreteOperator -from differentialOperator_d import DifferentialOperator_d -from parmepy.physics.compute_forces import Compute_forces -import pyopencl as cl -import numpy as np +from parmepy.operator.discrete import DiscreteOperator +from parmepy.operator.differentialOperator_d import DifferentialOperator_d +#from parmepy.physics.compute_forces import Compute_forces +from parmepy.constants import np import time @@ -45,7 +44,10 @@ class Penalization_d(DiscreteOperator): # if (self.topology.rank == 0): # self.f = open('./res/NocaForces.dat', 'w') - def apply(self, t, dt): + def setUp(self, method=''): + pass + + def apply(self, t, dt, ite): self.compute_time = time.time() @@ -87,7 +89,7 @@ class Penalization_d(DiscreteOperator): self.timings_info[0] = "\"Penalization total\"" self.timings_info[1] = str(self.total_time) print "Penalization total time : ", self.total_time - print "Time of the last Penalization iteration :", self.compute_time + print "Time of the last Penalization iteration :", self.compute_time def __str__(self): s = "Penalization_d (DiscreteOperator). " + DiscreteOperator.__str__(self) diff --git a/HySoP/hysop/operator/poisson.py b/HySoP/hysop/operator/poisson.py index 9209beee4fdb76690a8240acb271480dcad77810..76581f494bda369af101bfc33a1236fd6917e96f 100644 --- a/HySoP/hysop/operator/poisson.py +++ b/HySoP/hysop/operator/poisson.py @@ -3,11 +3,11 @@ @package parmepy.operator Poisson solver Continous Poisson solver operator (F2PY) """ -from continuous import ContinuousOperator -from poisson_d import Poisson_d +from parmepy.operator.continuous import Operator +from parmepy.operator.poisson_fft import PoissonFFT -class Poisson(ContinuousOperator): +class Poisson(Operator): """ Poisson solver operator representation using FFT """ @@ -25,7 +25,8 @@ class Poisson(ContinuousOperator): self.vorticity = vorticity self.addVariable([velocity, vorticity]) - def discretize(self, idVelocityD=0, idVorticityD=0, topology=None): + def discretize(self, resolutions=None, method='', + idVelocityD=0, idVorticityD=0,): """ Poisson solver operator discretization method. Create a discrete Poisson solver operator from given specifications. @@ -34,8 +35,10 @@ class Poisson(ContinuousOperator): @param idVelocityD : Index of vorticity. @param topology : topology of the input fields """ - if self.discreteOperator is None: - self.discreteOperator = Poisson_d(self, idVelocityD, idVorticityD, topology) + #create topology here + topology=None + self.discreteOperator = Poisson_d(self, idVelocityD, idVorticityD) + self.discreteOperator.setUp(topology) def __str__(self): """ToString method""" diff --git a/HySoP/hysop/operator/poisson_d.py b/HySoP/hysop/operator/poisson_fft.py similarity index 94% rename from HySoP/hysop/operator/poisson_d.py rename to HySoP/hysop/operator/poisson_fft.py index 39774764250442665c4ee015e964994688b852f0..4fcdd70f633d2e13820bbf234a54cd82958032a9 100644 --- a/HySoP/hysop/operator/poisson_d.py +++ b/HySoP/hysop/operator/poisson_fft.py @@ -4,18 +4,17 @@ Discrete Poisson solver operator using FFT """ from parmepy.f2py import fftw2py -from discrete import DiscreteOperator -from parmepy.constants import ORDER, PARMES_REAL -import numpy as np +from parmepy.operator.discrete import DiscreteOperator +from parmepy.constants import np, ORDER, PARMES_REAL import time -class Poisson_d(DiscreteOperator): +class PoissonFFT(DiscreteOperator): """ Operator representation. """ - def __init__(self, poisson, idVelocityD=0, idVorticityD=0, topology=None): + def __init__(self, poisson, idVelocityD=0, idVorticityD=0): """ Constructor. @param velocity : descretized velocity to update from vorticity. @@ -25,14 +24,16 @@ class Poisson_d(DiscreteOperator): self.poisson = poisson self.velocity = poisson.velocity.discreteField[idVelocityD] self.vorticity = poisson.vorticity.discreteField[idVorticityD] + self.compute_time = 0. + + def setUp(self, method='', topology=None): self.topology = topology self.ghosts = topology.ghosts self.resolution = topology.mesh.resolution self.dim = topology.dim self.coords = topology.mesh.coords - self.compute_time = 0. - def apply(self, t, dt): + def apply(self, t, dt, ite): """ Apply operator. """ @@ -80,7 +81,7 @@ class Poisson_d(DiscreteOperator): # computation of the current flow rate: int_y int_z uX dydz debX = 0. - debX = np.sum(self.velocity[0][0,ind1a:ind1b, ind2a:ind2b]) + debX = np.sum(self.velocity[0][0,ind1a:ind1b, ind2a:ind2b]) debX = debX * self.topology.mesh.size[1] * self.topology.mesh.size[2] self.velocity[0][ind0a:ind0b, ind1a:ind1b, ind2a:ind2b] = \ @@ -91,7 +92,7 @@ class Poisson_d(DiscreteOperator): # computation of the current flow rate: int_y int_z uY dydz debY = 0. - debY = np.sum(self.velocity[1][0,ind1a:ind1b, ind2a:ind2b]) + debY = np.sum(self.velocity[1][0,ind1a:ind1b, ind2a:ind2b]) debY = debY * self.topology.mesh.size[1] * self.topology.mesh.size[2] # computation of omega_z circulation @@ -109,7 +110,7 @@ class Poisson_d(DiscreteOperator): # computation of the current flow rate: int_y int_z uZ dydz debZ = 0. - debZ = np.sum(self.velocity[2][0,ind1a:ind1b, ind2a:ind2b]) + debZ = np.sum(self.velocity[2][0,ind1a:ind1b, ind2a:ind2b]) debZ = debZ * self.topology.mesh.size[1] * self.topology.mesh.size[2] # computation of omega_y circulation @@ -134,7 +135,7 @@ class Poisson_d(DiscreteOperator): self.timings_info[0] = "\"Poisson solver total\"" self.timings_info[1] = str(self.total_time) # print "Poisson total time : ", self.total_time - print "Time of the last Poisson solver loop :", self.compute_time + print "Time of the last Poisson solver loop :", self.compute_time def __str__(self): s = "Poisson_d (DiscreteOperator). " + DiscreteOperator.__str__(self) diff --git a/HySoP/hysop/operator/remeshing.py b/HySoP/hysop/operator/remeshing.py index 59d9aa7db469d9c318b4549768cdf03e83be5ee5..7d6c9e90fd15fd011660c9bc1f5645c6241e9c3c 100644 --- a/HySoP/hysop/operator/remeshing.py +++ b/HySoP/hysop/operator/remeshing.py @@ -3,9 +3,9 @@ Discrete remeshing representation """ -from .discrete import DiscreteOperator -from ..constants import * -import pyopencl as cl +from parmepy.operator.discrete import DiscreteOperator +from parmepy.constants import np +from parmepy.gpu import PARMES_REAL_GPU import time @@ -60,8 +60,8 @@ class Remeshing(DiscreteOperator): evt = self.numMethod.launch(self.ppos.gpu_data, self.pscal.gpu_data, self.res_scalar.gpu_data, - self.gpu_precision(self.res_scalar.domain.origin[splittingDirection]), - self.gpu_precision(self.res_scalar.domain.step[splittingDirection])) + self.gpu_precision(self.res_scalar.topology.mesh.origin[splittingDirection]), + self.gpu_precision(self.res_scalar.topology.mesh.size[splittingDirection])) for df in self.output: df.contains_data = False # Get timpings from OpenCL events diff --git a/HySoP/hysop/operator/advec_scales_d.py b/HySoP/hysop/operator/scales_advection.py similarity index 90% rename from HySoP/hysop/operator/advec_scales_d.py rename to HySoP/hysop/operator/scales_advection.py index e3ade527e8ffcc1f6c1526dada8a6a0337083329..04e753ca668f11935023905dc34e20f28cd979a1 100644 --- a/HySoP/hysop/operator/advec_scales_d.py +++ b/HySoP/hysop/operator/scales_advection.py @@ -1,22 +1,20 @@ # -*- coding: utf-8 -*- """ -@package operator +@package parmepy.operator.scales_advection Discrete Advection operator using scales code (Jean-Baptiste) """ from parmepy.f2py import scales2py -from discrete import DiscreteOperator -from parmepy.constants import ORDER, PARMES_REAL -import numpy as np +from parmepy.operator.discrete import DiscreteOperator +from parmepy.constants import np, ORDER, PARMES_REAL import time -class Advec_scales_d(DiscreteOperator): +class ScalesAdvection(DiscreteOperator): """ Operator representation. """ - def __init__(self, advec, idVelocityD=0, idAdvectedFieldD=0, - idAddFieldD=0, stab_coeff=None, topology=None): + def __init__(self, advec, idVelocityD=0, idAdvectedFieldD=0): """ Constructor. @param stab_coeff : stability coefficient @@ -28,16 +26,24 @@ class Advec_scales_d(DiscreteOperator): self.velocity = advec.velocity.discreteField[idVelocityD] self.advectedField = \ advec.advectedField.discreteField[idAdvectedFieldD] + self.variables = [self.velocity, self.advectedField] + self.input = [self.velocity] + self.output = [self.advectedField] + self.compute_time = 0. + + def setUp(self, method='', + idAddFieldD=0, stab_coeff=None, topology=None): self.addField = \ advec.addField.discreteField[idAddFieldD] + self.variables.append(self.addField) + self.output.append(self.addField) self.stab_coeff = stab_coeff self.topology = topology self.ghosts = topology.ghosts self.resolution = topology.mesh.resolution self.dim = topology.dim - self.compute_time = 0. - def apply(self, t, dt): + def apply(self, t, dt, ite): """ Apply operator. """ diff --git a/HySoP/hysop/operator/splitting.py b/HySoP/hysop/operator/splitting.py deleted file mode 100644 index 50fe86aba0ad7f318ab28a4765f12dc18216ad35..0000000000000000000000000000000000000000 --- a/HySoP/hysop/operator/splitting.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -@package parmepy.operator.splitting - -Splitting operator representation -""" -from .discrete import DiscreteOperator -import time - - -class Splitting(DiscreteOperator): - """ - Splitting operator representation. - - Operator of operators applying in a Strang splitting. - - Implements a 2nd order splitting in 3D: - @li X-dir, half time step - @li Y-dir, half time step - @li Z-dir, full time step - @li Y-dir, half time step - @li X-dir, half time step - \n - Implements a 2nd order splitting in 2D: - @li X-dir, half time step - @li Y-dir, full time step - @li X-dir, half time step - """ - - def __init__(self, operators=[], dim=3): - """ - Create a Splitting operator on a given list of operators and a dimension. - - @param operators : list of operators to split. - @param dim : problem dimension. - """ - DiscreteOperator.__init__(self) - ## Operators to split - self.operators = operators - ## Splitting at 2nd order. - self.splitting = [] - ## Half timestep in all directions - # [self.splitting.append((i, 0.5)) for i in xrange(dim)] - # [self.splitting.append((dim - 1 - i, 0.5)) for i in xrange(dim)] - ## Half timestep in all directions but last - [self.splitting.append((i, 0.5)) for i in xrange(dim - 1)] - self.splitting.append((dim - 1, 1.)) - [self.splitting.append((dim - 2 - i, 0.5)) for i in xrange(dim - 1)] - ## Compute time detailed per directions and per operators - self.compute_time_details = [[0. for i in xrange(dim)] for op in self.operators] - self.name = "splitting" - - def apply(self, t, dt): - """ - Apply Remeshing operator. - - @param t : current time. - @param dt : time step. - """ - c_time = 0. - for split in self.splitting: - #print "direction : " + str(split[0]) + " dt*" + str(split[1]) - op_index = 0 - for op in self.operators: - temp_time = op.apply(t, split[1] * dt, split[0]) - c_time += temp_time - self.compute_time_details[op_index][split[0]] += temp_time - op_index += 1 - self.total_time += c_time - return c_time - - def printComputeTime(self): - print "Splitting total time : ", self.total_time - self.timings_info[0] = "\"Splitting total\" " - self.timings_info[1] = str(self.total_time) + " " - for op in self.operators: - print " ", - op.printComputeTime() - self.timings_info[0] += op.timings_info[0] - self.timings_info[1] += op.timings_info[1] - - def __str__(self): - s = "Splitting (DiscreteOperator). Splitting steps : \n" - for split in self.splitting: - s += "direction : " + str(split[0]) + " dt=" + str(split[1]) + "\n" - s += "Concerned operators : \n" - for op in self.operators: - s += str(op) - return s - -if __name__ == "__main__": - print __doc__ - print "- Provided class : Splitting" - print RemeshingDOp.__doc__ diff --git a/HySoP/hysop/operator/stretching.py b/HySoP/hysop/operator/stretching.py index d26922c8d4cbdd7ec9396abe28ce095e2b3447d5..a2cb5946441ef60ea41f60e439d16f7cdfd53187 100755 --- a/HySoP/hysop/operator/stretching.py +++ b/HySoP/hysop/operator/stretching.py @@ -1,17 +1,16 @@ # -*- coding: utf-8 -*- """ -@package parmepy.operator.Stretching +@package parmepy.operator.stretching Stretching operator representation """ -from continuous import ContinuousOperator -from stretching_d import Stretching_d -from parmepy.integrator import RK3 +from parmepy.operator.continuous import Operator +from parmepy.operator.stretching_d import Stretching_d -class Stretching(ContinuousOperator): +class Stretching(Operator): """ Stretching operator representation """ @@ -32,8 +31,8 @@ class Stretching(ContinuousOperator): self.vorticity = vorticity self.addVariable([velocity, vorticity]) - def discretize(self, topology=None, idVelocityD=0, - idVorticityD=0, propertyOp='divConservation', method=RK3): + def setUp(self, resolutions=None, method='RK3', + idVelocityD=0, idVorticityD=0, propertyOp='divConservation'): """ Stretching operator discretization method. Create a discrete Stretching operator from given specifications. @@ -44,10 +43,11 @@ class Stretching(ContinuousOperator): the stretching Op ( divConservation or massConservation) @param method : the method to use. (Euler, RK, ..) default : RK3 """ - if self.discreteOperator is None: - self.discreteOperator = Stretching_d(self, topology, idVelocityD, - idVorticityD, propertyOp, - method) + #create topology here + topology = None + self.discreteOperator = Stretching_d(self, idVelocityD, + idVorticityD) + self.discreteOperator.setUp(method, topology, propertyOp) def __str__(self): """ToString method""" diff --git a/HySoP/hysop/operator/stretching_d.py b/HySoP/hysop/operator/stretching_d.py index b4af77e31948f9b921a8127951467eeae2825e4e..ff6bbe3ae6780dd8d32ade4d810698e71f816837 100755 --- a/HySoP/hysop/operator/stretching_d.py +++ b/HySoP/hysop/operator/stretching_d.py @@ -5,10 +5,13 @@ Discrete stretching representation """ -from discrete import DiscreteOperator -from differentialOperator_d import DifferentialOperator_d +from parmepy.operator.discrete import DiscreteOperator +from parmepy.operator.differentialOperator_d import DifferentialOperator_d from fct2op import Fct2Op -import parmepy.integrator as integrator +from parmepy.numerics.integrators.euler import Euler +from parmepy.numerics.integrators.runge_kutta2 import RK2 +from parmepy.numerics.integrators.runge_kutta3 import RK3 +from parmepy.numerics.integrators.runge_kutta4 import RK4 import time @@ -18,8 +21,7 @@ class Stretching_d(DiscreteOperator): DiscreteOperator.DiscreteOperator specialization. """ - def __init__(self, stretch, topology=None, idVelocityD=0, idVorticityD=0, - propertyOp='divConservation', method=None): + def __init__(self, stretch, idVelocityD=0, idVorticityD=0): """ Constructor. Create a Stretching operator on a given continuous domain. @@ -35,8 +37,12 @@ class Stretching_d(DiscreteOperator): self.vorticity = stretch.vorticity.discreteField[idVorticityD] # ## input fields # self.input = [self.velocity, self.vorticity] + + def setUp(self, topology=None, propertyOp='divConservation', + method=None): self.propertyOp = propertyOp - self.method = method + if method == 'RK3': + self.method = RK3 ## self.name = "stretching" self.topology = topology @@ -110,16 +116,16 @@ class Stretching_d(DiscreteOperator): def stabilityTest(self, dt, maxi, method): dtstretch = dt cststretch = 0. - if method == integrator.Euler: + if isinstance(method, Euler): cststretch = 2.0 dtstretch = min(dtstretch, cststretch / maxi) - if method == integrator.RK2: + if isinstance(method, RK2): cststretch = 2.0 dtstretch = min(dtstretch, cststretch / maxi) - if method == integrator.RK3: + if isinstance(method, RK3): cststretch = 2.5127 dtstretch = min(dtstretch, cststretch / maxi) - if method == integrator.RK4: + if isinstance(method, RK4): cststretch = 2.7853 dtstretch = min(dtstretch, cststretch / maxi) if dt == dtstretch: diff --git a/HySoP/hysop/operator/tests/__init__.py b/HySoP/hysop/operator/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..430a206ea3619dd4a0923cf131408ed6e1825a44 --- /dev/null +++ b/HySoP/hysop/operator/tests/__init__.py @@ -0,0 +1,5 @@ +""" +@package parmepy.operator + +Everything concerning operators. +""" diff --git a/HySoP/hysop/operator/tests/test_advection.py b/HySoP/hysop/operator/tests/test_advection.py new file mode 100644 index 0000000000000000000000000000000000000000..bcdeb201136e7b1f4a43ac934c65425594203596 --- /dev/null +++ b/HySoP/hysop/operator/tests/test_advection.py @@ -0,0 +1,21 @@ +""" +Testing parmepy.operator.advection +""" +import parmepy as pp +from parmepy.operator.advection import Advection +from parmepy.fields.continuous import Field + + +def test_setUp(): + dom = pp.box.Box() + resolTopo = [33, 33, 17] + csf = Field(dom) + cvf = Field(dom, vector=True) + advec = Advection(cvf, csf) + advec.setUp(resolutions={csf: [33, 33, 17], + cvf: [33, 33, 17]}, + method='') + # advec = Advection(cvf, csf) + # advec.setUp(resolutions={csf: [33, 33, 17], + # cvf: [33, 33, 17]}, + # method='gpu_2k_rk2_m6prime') diff --git a/HySoP/hysop/operator/tests/test_particle_advection.py b/HySoP/hysop/operator/tests/test_particle_advection.py new file mode 100644 index 0000000000000000000000000000000000000000..6706b7599c4aa6f0482d6d41f0398d7cd90ec50a --- /dev/null +++ b/HySoP/hysop/operator/tests/test_particle_advection.py @@ -0,0 +1,21 @@ +""" +Testing parmepy.operator.particle_advection +""" +from parmepy.domain.box import Box +from parmepy.fields.analytical import AnalyticalField +from parmepy.operator.advection import Advection +import pytest + + +def test_null_velocity(): + dom = Box(dimension=2, origin=[0., 0.], length=[1., 1.]) + resolTopo = [33, 33] + csf = AnalyticalField(dom, formula=lambda x, y: 1.) + cvf = AnalyticalField(dom, vector=True, formula=lambda x, y: (0., 0.)) + advec = Advection(cvf, csf) + advec.setUp(resolutions={csf: [33, 33], + cvf: [33, 33]}, + method='') + pytest.raises(ValueError, advec.apply, 0., 1., 1) +# advec.apply(0., 1., 1) + diff --git a/HySoP/hysop/operator/transport.py b/HySoP/hysop/operator/transport.py deleted file mode 100644 index b130522ccd68df9fc098920dcc8df3d4a663337d..0000000000000000000000000000000000000000 --- a/HySoP/hysop/operator/transport.py +++ /dev/null @@ -1,62 +0,0 @@ -""" -@package parmepy.operator.transport - -Transport operator representation. -""" -from .continuous import ContinuousOperator -from .transport_d import Transport_d -from .advec_and_remesh_d import Advec_and_Remesh_d - - -class Transport(ContinuousOperator): - """ - Transport operator representation. - """ - - def __init__(self, velocity, scalar, include_remesh=False): - """ - Create an Transport operator from given variables velocity and scalar. - - @param velocity : velocity variable. - @param scalar : scalar variable. - """ - ContinuousOperator.__init__(self) - ## Transport velocity - self.velocity = velocity - ## Transported scalar - self.scalar = scalar - self.needSplitting = True - self.addVariable([velocity, scalar]) - self.include_remesh = include_remesh - - def discretize(self, idVelocityD=0, idScalarD=0, result_position=None, result_scalar=None, method=None): - """ - Transport operator discretization method. - Create an discrete Transport operator from given specifications. - - @param idVelocityD : Index of velocity discretisation to use. - @param idScalarD : Index of scalar discretisation to use. - @param result_position : result position. - @param result_scalar : result scalar. - @param method : the method to use. - """ - if self.discreteOperator is None: - if self.include_remesh: - self.discreteOperator = Advec_and_Remesh_d(self, idVelocityD, idScalarD, result_scalar, method) - else: - self.discreteOperator = Transport_d(self, idVelocityD, idScalarD, result_position, result_scalar, method) - - def __str__(self): - """ToString method""" - s = "Advection operator (ContinuousOperator)" - if self.discreteOperator is not None: - s += " with the following discretization:\n" - s += str(self.discreteOperator) - else: - s += ". Not discretised" - return s + "\n" - -if __name__ == "__main__": - print __doc__ - print "- Provided class : Advection" - print Advection.__doc__ diff --git a/HySoP/hysop/operator/transport_d.py b/HySoP/hysop/operator/transport_d.py index c1dd96a4b1248c88657b68e214a9fe53a09ccf57..df8e385e53e666bd6bd40e3615b1fc5038946e39 100644 --- a/HySoP/hysop/operator/transport_d.py +++ b/HySoP/hysop/operator/transport_d.py @@ -3,9 +3,9 @@ Discrete transport representation """ -from ..constants import * -from .discrete import DiscreteOperator -import pyopencl as cl +from parmepy.constants import np +from parmepy.operator.discrete import DiscreteOperator +from parmepy.gpu import PARMES_REAL_GPU import time @@ -15,10 +15,12 @@ class Transport_d(DiscreteOperator): """ - def __init__(self, advec, idVelocityD=0, idScalarD=0, result_position=None, result_scalar=None, method=None): + def __init__(self, advec, idVelocityD=0, idScalarD=0, + result_position=None, result_scalar=None, method=None): """ Create a Advection operator. - Work on a given scalar at a given velocity to produce scalar distribution at new positions. + Work on a given scalar at a given velocity to produce scalar + distribution at new positions. @param advec : Transport operator @param idVelocityD : Index of velocity discretisation to use. @@ -61,10 +63,12 @@ class Transport_d(DiscreteOperator): Advection algorithm: @li 1. Particle initialization : \n - - by copy scalar from grid to particles if previous splitting direction equals current splitting direction.\n + - by copy scalar from grid to particles if previous splitting + direction equals current splitting direction.\n - by transposition of scalar from grid to particle. @li 2. Particle advection :\n - - compute particle position in splitting direction as a scalar. Performs a RK2 resolution of dx_p/dt = a_p. + - compute particle position in splitting direction as a + scalar. Performs a RK2 resolution of dx_p/dt = a_p. @li 3. Profile timings of OpenCL kernels. """ c_time, c_time_init = 0., 0. @@ -75,7 +79,7 @@ class Transport_d(DiscreteOperator): evt_init = self.init_copy.launch(self.scalar.gpu_data, self.res_scalar.gpu_data) else: - if self.scalar.domain.dimension == 2: + if self.scalar.topology.dim == 2: evt_init = self.init_transpose.launch(self.scalar.gpu_data, self.res_scalar.gpu_data) else: @@ -91,8 +95,8 @@ class Transport_d(DiscreteOperator): evt = self.numMethod.launch(self.velocity.gpu_data[splittingDirection], self.res_position.gpu_data, self.gpu_precision(dt), - self.gpu_precision(self.scalar.domain.origin[splittingDirection]), - self.gpu_precision(self.scalar.domain.step[splittingDirection])) + self.gpu_precision(self.scalar.topology.mesh.origin[splittingDirection]), + self.gpu_precision(self.scalar.topology.mesh.size[splittingDirection])) for df in self.output: df.contains_data = False # Get timpings from OpenCL events diff --git a/HySoP/hysop/operator/velocity.py b/HySoP/hysop/operator/velocity.py index 00e9c7e2ac314f1fe42897df56c68c0efbfe0dd1..1db3f034f0b4a3ab9fd4d8932e7337b88c266b92 100644 --- a/HySoP/hysop/operator/velocity.py +++ b/HySoP/hysop/operator/velocity.py @@ -3,11 +3,12 @@ Velocity operator representation. """ -from .continuous import ContinuousOperator -from .velocity_d import Velocity_P +from parmepy.operator.continuous import Operator +from parmepy.mpi.topology import Cartesian +from parmepy.operator.velocity_d import Velocity_P -class Velocity(ContinuousOperator): +class Velocity(Operator): """ Velocity operator representation. """ @@ -23,7 +24,7 @@ class Velocity(ContinuousOperator): self.velocity = velocity self.addVariable([velocity]) - def discretize(self, idVelocityD=0, method=None): + def setUp(self, resolutions=None, method=''): """ Advection operator discretization method. Create an AdvectionDOp.AdvectionDOp from given specifications. @@ -31,8 +32,12 @@ class Velocity(ContinuousOperator): @param idVelocityD : Index of velocity discretisation to use. @param method : the method to use. """ - if self.discreteOperator is None: - self.discreteOperator = Velocity_P(self, idVelocityD, method) + topo = Cartesian(self.velocity.domain, + self.velocity.domain.dimension, + resolutions[self.velocity]) + self.velocity.discretize(topo) + self.discreteOperator = Velocity_P(self, idVelocityD) + self.discreteOperator.setUp(method) def __str__(self): """ToString method""" diff --git a/HySoP/hysop/operator/velocity_d.py b/HySoP/hysop/operator/velocity_d.py index 72592f36a99aa13f4eaf16124deb5d7c370485a9..886261aa70f2917465d6edc2eb888ec35f37585c 100644 --- a/HySoP/hysop/operator/velocity_d.py +++ b/HySoP/hysop/operator/velocity_d.py @@ -3,9 +3,9 @@ Velocity operator representation. """ -from ..constants import * -from .discrete import DiscreteOperator -import pyopencl as cl +from parmepy.constants import np +from parmepy.gpu import cl +from parmepy.operator.discrete import DiscreteOperator import time @@ -14,7 +14,7 @@ class Velocity_P(DiscreteOperator): Velocity computation from analytical expression.x """ - def __init__(self, velocity_Op, idVelocityD=0, method=None): + def __init__(self, velocity_Op, idVelocityD=0): """ Create a Velocity operator. @@ -33,6 +33,9 @@ class Velocity_P(DiscreteOperator): self.name = "velocity" self.gpu_precision = PARMES_REAL_GPU + def setUp(self, method=''): + raise NotImplemented("Not implemented") + def apply(self, t, dt): c_time = 0. if self.numMethod is not None: diff --git a/HySoP/hysop/problem/problem.py b/HySoP/hysop/problem/problem.py index 39926bf869ce508d53ad89a16ad9ea454880e6e6..be071715cacc1857357366434cd8889d0c6817c0 100644 --- a/HySoP/hysop/problem/problem.py +++ b/HySoP/hysop/problem/problem.py @@ -3,11 +3,11 @@ Complete problem description. """ -from parmepy.particular_solvers.basic import ParticleSolver -from parmepy.particular_solvers.gpu import GPUParticleSolver +from abc import ABCMeta, abstractmethod +from parmepy.operator.monitors.monitoring import Monitoring -class Problem(): +class Problem(object): """ Problem representation. @@ -19,76 +19,40 @@ class Problem(): Finally, the problem handle outputs thanks to a io object. """ - def __init__(self, topology, operators, params=None): + __metaclass__ = ABCMeta + + @abstractmethod + def __init__(self, operators, monitors=[]): """ Create a transport problem instance. - @param topology : underlying topology. - \todo make topology transparent to user. @param operators : list of operators. - @param params : unused. """ - ## Problem domains - self.domains = [] - ## Prolem variables - self.variables = [] ## Problem operators self.operators = operators + for m in monitors: + self.operators.append(m) ## Problem topology - self.topology = topology - ## Solver for problem - self.solver = None + self.topology = [] ## Computes time step and manage iterations self.timer = None - ## IO manager - self.io = None ## Timings informations - self.timings_info = ["", ""] - for op in self.operators: - self.addVariable(op.variables) - for v in self.variables: - self.addDomain(v.domain) - - def setTimer(self, t): - """ - Set timer. - @param t : timer. - """ - self.timer = t - - def setIO(self, io): - """ - Set IO. - @param io : io interface. - """ - self.io = io - - def addDiagnostics(self, diagnostics): - """ - Add diagnostics. - @param diagnostics : list of diagnostics related to the problem. - """ - self.diagnostics = diagnostics + self.time_info = ["", ""] - def setSolver(self, t_end, dt, solver_type='gpu', **kwargs): + def setUp(self, t_end, dt): """ Set solver. @param t_end : duration of the simulation. @param dt : simulation time step. - @param solver_type : type of solver to use. Default : gpu solver. - @param **kwargs : dynamic dictionary of arguments specific to solver. """ - if solver_type == 'basic': - self.solver = ParticleSolver(self, t_end, dt, **kwargs) - elif solver_type == 'gpu': - self.solver = GPUParticleSolver(self, t_end, dt, **kwargs) - else: - raise ValueError("Unknown solver type : " + str(solver_type)) - - def initSolver(self): - """Initialize solver.""" - if self.solver is not None: - self.solver.initialize() + self.timer = Timer(t_end, dt) + + def _collect_topologies(self): + for op in self.operators: + for v in op.variables: + for topo in v.topologies: + if topo not in self.topology: + self.topology.append(topo) def solve(self): """ @@ -101,77 +65,28 @@ class Problem(): """ ## Iterations counter ite = 0 - print "\n\n Start solving ..." - if not self.solver.isInitialized: - self.initSolver() - self.io.step() + for op in self.operators: + if isinstance(op, Monitoring): + op.apply(self.timer.t, self.timer.dt, ite) while not self.timer.end: - if (self.topology.rank == 0): + ite = ite + 1 + if (self.topology[0].rank == 0): print "==== Iteration : {0:3d} t={1:6.3f} ====".format( - self.timer.ite + 1, self.timer.t + self.timer.dt) + self.timer.ite, self.timer.t + self.timer.dt) for op in self.operators: - op.apply(self.timer.t, self.timer.dt) + op.apply(self.timer.t, self.timer.dt, ite) self.timer.step() - self.io.step() - [d.process(ite, self.timer.t, self.timer.dt) for d in self.diagnostics] - ite = ite + 1 print "\n\n End solving\n" - self.solver.end() print "=== Timings ===" - if (self.topology.rank == 0): - print "IO total", self.io.compute_time - self.timings_info[0] += "\"IO\" " - self.timings_info[1] += str(self.io.compute_time) + " " + if (self.topology[0].rank == 0): for op in self.operators: op.printComputeTime() - self.timings_info[0] += op.timings_info[0] - self.timings_info[1] += op.timings_info[1] + self.time_info[0] += op.time_info[0] + self.time_info[1] += op.time_info[1] print "\n" print "===\n" - def addVariable(self, cVariable): - """ - Add an continuous variables to the operator. - Also add variables' domains to the operator. - - @param cVariable ContinuousVariable.ContinuousVariable : - variables to add. - """ - try: - for v in cVariable: - if self.variables.count(v) == 0: - self.variables.append(v) - except (TypeError): - if self.variables.count(cVariable) == 0: - self.variables.append(cVariable) - - def addDomain(self, cDomain): - """ - Add an domain to the operator. - - @param cDomain : domain to add - """ - try: - for d in cDomain: - if self.domains.count(d) == 0: - self.domains.append(d) - except (TypeError): - if self.domains.count(cDomain) == 0: - self.domains.append(cDomain) - - def addOperator(self, op, index=None): - """ - Add operator to problem. - - @param op : operator. - @param index : operator index. - """ - if not op in self.operators: - if index is None: - self.operators.append(op) - else: - self.operators.insert(index, op) def __str__(self): """ToString method""" @@ -190,7 +105,48 @@ class Problem(): return s + +class Timer: + """ + Manage time steps and simulation end. + + """ + + def __init__(self, t_end, dt, t_init=0.): + """ + Creates a Timer. + + @param t_end : Simulation final time. + @param dt : Time step. + @param t_init : Simulation starting time. + """ + ## Simulation final time + self.t_end = t_end + ## Simulation time step + self.dt = dt + ## Simulation current time + self.t = t_init + ## Is simulation is terminated + self.end = False + ## Iteration counter + self.ite = 0 + + def step(self): + """ + Update current time. + """ + if self.t < self.t_end: + self.ite += 1 + self.t += self.dt + if self.t >= self.t_end: + self.end = True + + def __str__(self): + s = "Timer (DiscreteOperator). " + return s + if __name__ == "__main__": print __doc__ - print "- Provided class : Problem" + print "- Provided class : problem, Timer" print Problem.__doc__ + print Timer.__doc__ diff --git a/HySoP/hysop/problem/transport.py b/HySoP/hysop/problem/transport.py new file mode 100644 index 0000000000000000000000000000000000000000..4d25501018507045c142f2b3b58cf42f220f5a14 --- /dev/null +++ b/HySoP/hysop/problem/transport.py @@ -0,0 +1,29 @@ +""" +@package parmepy.probem.tranport +""" +from parmepy.problem.problem import Problem + + +class TransportProblem(Problem): + """ + Transport problem description. + """ + def __init__(self, advection, velocity=None, monitors=[]): + if velocity is not None: + Problem.__init__(self, [advection, velocity], monitors) + else: + Problem.__init__(self, [advection], monitors) + self.advection = advection + self.velocity = velocity + + def setUp(self, t_end, dt, + advection_config, + velocity_config=None): + if velocity_config is None and self.velocity is not None: + raise ValueError("A configuration for velocity operator" + + " is required") + Problem.setUp(self, t_end, dt) + self.advection.setUp(**advection_config) + if self.velocity is not None: + self.velocity.setUp(velocity_config) + self._collect_topologies() diff --git a/HySoP/hysop/tools/diagnostics.py b/HySoP/hysop/tools/diagnostics.py deleted file mode 100644 index c716eae9fe6278728d57ea56339a2f3875d2c098..0000000000000000000000000000000000000000 --- a/HySoP/hysop/tools/diagnostics.py +++ /dev/null @@ -1,34 +0,0 @@ -"""@package parmepy.physics.diagnostics - -Classes for diagnostics description. -""" -from abc import ABCMeta, abstractmethod - - -class Diagnostics: - """Abstract description of diagnostics. """ - - __metaclass__ = ABCMeta - - @abstractmethod - def __init__(self, frequence=None, outputPrefix=None): - """ Constructor - @param frequence : output file producing frequency. - @param outputPrefix : file name prefix, contains relative path. - """ - ## Domain dimension. - self.frequence = frequence - self.outputPrefix = outputPrefix - - @abstractmethod - def process(self, time=None, timeStep=None): - """ - @param time : current time. - @param timeStep : time step. - """ - -if __name__ == "__main__": - print __doc__ - print "- Provided class : Diagnostics (abstract)." - print Diagnostics.__doc__ - diff --git a/HySoP/hysop/tools/timer.py b/HySoP/hysop/tools/timer.py deleted file mode 100644 index d3013a3cbafdcc77b69ecdf61fa30542b2025d48..0000000000000000000000000000000000000000 --- a/HySoP/hysop/tools/timer.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -@package parmepy.tools.timer - -Time steps manager. -""" -import time - - -class Timer: - """ - Manage time steps and simulation end. - - """ - - def __init__(self, t_end, dt, t_init=0.): - """ - Creates a Timer. - - @param t_end : Simulation final time. - @param dt : Time step. - @param t_init : Simulation starting time. - """ - ## Simulation final time - self.t_end = t_end - ## Simulation time step - self.dt = dt - ## Simulation current time - self.t = t_init - ## Is simulation is terminated - self.end = False - ## Iteration counter - self.ite = 0 - - def step(self): - """ - Update current time. - """ - if self.t < self.t_end: - self.ite += 1 - self.t += self.dt - if self.t >= self.t_end: - self.end = True - - def __str__(self): - s = "Timer (DiscreteOperator). " - return s - -if __name__ == "__main__": - print __doc__ - print "- Provided class : RemeshingDOp" - print RemeshingDOp.__doc__ diff --git a/HySoP/setup.py.in b/HySoP/setup.py.in index 382cf67474699cfe012d19336bf1348e1aa65c5b..c0de913d7206152db0dcdb7dc4c07278b1aa54fe 100644 --- a/HySoP/setup.py.in +++ b/HySoP/setup.py.in @@ -14,28 +14,22 @@ name = '@PYPACKAGE_NAME@' # List of modules (directories) to be included packages = ['parmepy', 'parmepy.domain', - ## 'parmepy.domain.tests', 'parmepy.fields', - 'parmepy.integrator', - 'parmepy.obstacle', 'parmepy.operator', + 'parmepy.operator.monitors', 'parmepy.problem', - 'parmepy.particular_solvers', - 'parmepy.physics', 'parmepy.tools', - ## 'parmepy.test', - ## 'parmepy.test.test_domain', - ## 'parmepy.test.test_field', - ## 'parmepy.test.test_operator', - ## 'parmepy.test.test_particular_solvers', - ## 'parmepy.solvers' + 'parmepy.numerics', + 'parmepy.numerics.integrators', + 'parmepy.numerics.remeshing', ] -# 'examples'] - if("@USE_MPI@" is "ON"): packages.append('parmepy.mpi') +if("@WITH_GPU@" is "ON"): + packages.append('parmepy.gpu') + # Enable this to get debug info DISTUTILS_DEBUG=1 @@ -65,7 +59,7 @@ if(enable_fortran is "ON"): parmeslib.append('fftw3') parmeslib.append('fftw3_mpi') parmes_libdir.append(fftwdir) - + withscales = '@WITH_SCALES@' if(withscales is "ON"): if(withfftw is "OFF"): @@ -75,7 +69,7 @@ if(enable_fortran is "ON"): options = [('F2PY_REPORT_ON_ARRAY_COPY', '1')] if(os.uname()[0] == 'Linux'): options.append(('F2PY_REPORT_ATEXIT','1')) - + parpyModule = Extension(name='parmepy.f2py', f2py_options=f2py_options, sources=fortran_src, @@ -88,7 +82,7 @@ else: packages.append('parmepy.f2py') packages.append('parmepy.f2py.scales2py') packages.append('parmepy.f2py.fftw2py') - + config = Configuration(name=name, version='@PYPACKAGE_VERSION@', description='Particular Methods implementation in Python', @@ -99,9 +93,9 @@ config = Configuration(name=name, package_dir={'': '@CMAKE_SOURCE_DIR@'}, ext_modules=ext_modules, packages=packages, - data_files=[('./parmepy/particular_solvers/gpu_src', - ['@CMAKE_SOURCE_DIR@/parmepy/particular_solvers/gpu_src/' + cl_file - for cl_file in os.listdir('@CMAKE_SOURCE_DIR@/parmepy/particular_solvers/gpu_src/') + data_files=[('./parmepy/gpu/cl_src', + ['@CMAKE_SOURCE_DIR@/parmepy/gpu/cl_src/' + cl_file + for cl_file in os.listdir('@CMAKE_SOURCE_DIR@/parmepy/gpu/cl_src/') if cl_file[0]!='.' and cl_file[-3:]=='.cl'])] )