diff --git a/CodingRules.org b/CodingRules.org index 88e0977223bad768ba183d55bb969ecf75e5058e..12a50935dbb90c13177fd6f8a2445cad85322cfb 100644 --- a/CodingRules.org +++ b/CodingRules.org @@ -19,5 +19,23 @@ This file provides a list of coding rules for Parmes, parmepy that MUST be appli * Python ** Naming conventions see http://www.python.org/dev/peps/pep-0008/ +*** constants must be uppercased + +** abstract class model : domain/domain.py + +** use distutils (__init__.py ...) + see http://docs.python.org/distutils/setupscript.html +* Test and examples + Three directories : +** test in Parmes => unitary tests +** benchmarcks => performances (where????) included in unitary tests??? +** examples => "real physical cases" for users +Arch : +/Examples/Name/ : name.py and other utilities +/Examples/Name/Description : all tex, pdf ... files with a proper description of the pb, the model, the parameters ... + + + +======= ** use distutils (__init__.py ...) see http://docs.python.org/distutils/setupscript.html diff --git a/Examples/TaylorGreen3D.py b/Examples/TaylorGreen3D.py index 0879aaf4523b49bb3184cba425a18cf3450bb82b..18169eb875024a27f4612a2b7dc9eb4ce49830d1 100755 --- a/Examples/TaylorGreen3D.py +++ b/Examples/TaylorGreen3D.py @@ -1,6 +1,12 @@ #!/usr/bin/python -#import sys -#sys.path.insert(0,'/scratch/mimeau/install-parmes3/') + +""" +Taylor Green 3D : see paper van Rees 2011. + +All parameters are set and defined in python module dataTG. + +""" + import parmepy as pp from parmepy.f2py import fftw2py import numpy as np @@ -17,7 +23,7 @@ from parmepy.operator.monitors.printer import Printer from parmepy.operator.monitors.energy_enstrophy import Energy_enstrophy from dataTG import dim, nb, NBGHOSTS, ADVECTION_METHOD, VISCOSITY, \ WITH_PROJ, OUTPUT_FREQ, FILENAME, simu -from parmepy.constants import debug, GRADUW, CONSERVATIVE +from parmepy.constants import CONSERVATIVE ## ----------- A 3d problem ----------- @@ -31,6 +37,7 @@ box = pp.Box(dim, length=[2.0 * pi, 2.0 * pi, 2.0 * pi]) ## Global resolution nbElem = [nb] * dim + ## Function to compute TG velocity def computeVel(x, y, z): vx = m.sin(x) * m.cos(y) * m.cos(z) @@ -38,6 +45,7 @@ def computeVel(x, y, z): vz = 0. return vx, vy, vz + ## Function to compute reference vorticity def computeVort(x, y, z): wx = - m.cos(x) * m.sin(y) * m.sin(z) @@ -52,6 +60,13 @@ vorti = Field(domain=box, formula=computeVort, name='Vorticity', isVector=True) ## Usual Cartesian topology definition +# At the moment we use two (or three?) topologies : +# - "topo" for Stretching and all operators based on finite differences. +# --> ghost layer = 2 +# - topo from Advection operator for all the other operators. +# --> no ghost layer +# - topo from fftw for Poisson and Diffusion. +# Todo : check compat between scales and fft operators topologies. ghosts = np.ones((box.dimension)) * NBGHOSTS topo = Cartesian(box, box.dimension, nbElem, ghosts=ghosts) @@ -66,7 +81,7 @@ advec = Advection(velo, vorti, stretch = Stretching(velo, vorti, resolutions={velo: nbElem, vorti: nbElem}, - formulation=GRADUW, + formulation=CONSERVATIVE, topo=topo ) @@ -93,8 +108,13 @@ printer = Printer(fields=[velo, vorti], prefix='./res/TG_', ext='.vtk') +# Bridges between the different topologies in order to +# redistribute data. +# 1 -Advection to stretching distrAdvStr = Redistribute([vorti, velo], advec, stretch) +# 2 - Stretching to Poisson/Diffusion distrStrPoiss = Redistribute([vorti, velo], stretch, poisson) +# 3 - Poisson to Energy (maybe useless??) distrPoissEnergy = Redistribute([vorti, velo], poisson, energy) ## Define the problem to solve @@ -105,7 +125,8 @@ pb = NSProblem(operators=[advec, distrAdvStr, stretch, distrStrPoiss, ## Setting solver to Problem pb.setUp() -print 'all topologies', box.topologies +for topo in box.topologies.values(): + print topo ## Solve problem diff --git a/Examples/dataTG.py b/Examples/dataTG.py index 353cd1441e700d0326be9163f99170e8f5fe105a..4c725456fba89608490a7be85fc445ab384c633b 100644 --- a/Examples/dataTG.py +++ b/Examples/dataTG.py @@ -3,7 +3,7 @@ # problem dimension dim = 3 # resolution -nb = 129 +nb = 65 # number of ghosts in usual cartesian topo NBGHOSTS = 2 # Advection method diff --git a/Examples/howto_integrators.py b/Examples/howto_integrators.py index b0b453db66d51f10e2eedd790de12686d61fdddd..8118cb3e8ee487ba6842f8d6fa862a915772a835 100644 --- a/Examples/howto_integrators.py +++ b/Examples/howto_integrators.py @@ -17,7 +17,6 @@ sin = np.sin cos = np.cos import matplotlib.pyplot as plt from parmepy.constants import MPI -import copy # Grid resolution for tests nb = 5 @@ -646,4 +645,4 @@ print "\n====================== 1D ======================\n" print "\n====================== 2D ======================\n" optim = WITH_GUESS -test_2D_0(RK4, ode.RK4, optim) +test_2D_0(RK3, ode.RK3, optim) diff --git a/HySoP/CMake/ParmesTests.cmake b/HySoP/CMake/ParmesTests.cmake index 87d3f887437eebc108c418f2d494b5856fb0e04f..bbe0ce7e56697b48c48c2b81c7393c054bb9a6a9 100644 --- a/HySoP/CMake/ParmesTests.cmake +++ b/HySoP/CMake/ParmesTests.cmake @@ -7,9 +7,9 @@ set(py_src_dirs fields domain operator + numerics problem tools - numerics ) if(USE_MPI) diff --git a/HySoP/CMakeLists.txt b/HySoP/CMakeLists.txt index 045fd87e29217fa81600a8d475e512d80bb8dc83..71635c0a1f3440702b9ba1f1aa412f32f1784717 100644 --- a/HySoP/CMakeLists.txt +++ b/HySoP/CMakeLists.txt @@ -41,6 +41,7 @@ option(DEBUG "Enable debug mode for Parmes (0:disabled, 1:verbose, 2:trace, 3:ve option(FULL_TEST "Enable all test options (pep8 ...) - Default = OFF" OFF) option(PROFILE "Enable profiling mode for Parmes. 0:disabled, 1: enabled. Default = 0" 0) option(USER_INSTALL "Set install location to python default (equivalent to --user option in python setup). Default = ON" ON) +option(OPTIM "To allow python -OO run, some packages must be deactivated. Set this option to 'ON' to do so. Default = OFF" OFF) if(NOT WITH_LIB_FORTRAN) message(WARNING "You deactivate libparmes (fortran) generation. This will disable fftw and scales fonctionnalities.") set(WITH_FFTW "OFF") @@ -250,6 +251,7 @@ if(VERBOSE_MODE) message(STATUS " Project will be built in build/${${PROJECT_NAME}_PYTHON_BUILD_DIR}.") message(STATUS " Python packages will be installed in ${CMAKE_INSTALL_PREFIX}") message(STATUS " ${PROJECT_NAME} debug mode : ${DEBUG}") + message(STATUS " Enable -OO run? : ${OPTIM}") message(STATUS "====================== ======= ======================") message(STATUS " ") message(STATUS "Try :") diff --git a/HySoP/Global_tests/README b/HySoP/Global_tests/README new file mode 100644 index 0000000000000000000000000000000000000000..20dd663dfeeb6626ab855bf064b2685d87ff9d0a --- /dev/null +++ b/HySoP/Global_tests/README @@ -0,0 +1,3 @@ +This directory contains py files used to test global functionnalities (a specific operator, a method ...) +and to check performances and memory usage. + diff --git a/HySoP/Global_tests/testPerfAndMemForFD_and_div.py b/HySoP/Global_tests/testPerfAndMemForFD_and_div.py new file mode 100644 index 0000000000000000000000000000000000000000..f104f55121ebb061b205dbe2b03fb69d0cd8427f --- /dev/null +++ b/HySoP/Global_tests/testPerfAndMemForFD_and_div.py @@ -0,0 +1,305 @@ +#!/usr/bin/python + +""" +Test memory and performances for finite differences operations. + +""" + +import parmepy as pp +import parmepy.tools.numpywrappers as npw +import math as m +from parmepy.fields.continuous import Field +from parmepy.mpi.topology import Cartesian +from parmepy.operator.stretching import Stretching + + +## ----------- A 3d problem ----------- +print " ========= Start Finite Differences tests =========" +dim = 3 +pi = m.pi +sin = m.sin +cos = m.cos +## Domain +box = pp.Box(dim, length=[2.0 * pi, 2.0 * pi, 2.0 * pi]) + +## Global resolution +nb = 65 +nbElem = [nb] * dim + + +## Function to compute velocity +def computeVel(x, y, z): + vx = sin(x) * cos(y) * cos(z) + vy = - cos(x) * sin(y) * cos(z) + vz = 0. + return vx, vy, vz + + +## Function to compute vorticity +def computeVort(x, y, z): + wx = - cos(x) * sin(y) * sin(z) + wy = - sin(x) * cos(y) * sin(z) + wz = 2. * sin(x) * sin(y) * cos(z) + return wx, wy, wz + +## Fields +velo = Field(domain=box, formula=computeVel, + name='Velocity', isVector=True) +vorti = Field(domain=box, formula=computeVort, + name='VorticityC', isVector=True) + +## A topology with ghosts +NBGHOSTS = 2 +ghosts = npw.ones((box.dimension)) * NBGHOSTS +topo = Cartesian(box, box.dimension, nbElem, + ghosts=ghosts) + +## local coordinates +coords = topo.mesh.coords +## Fields discretization +velo.discretize(topo) +vorti.discretize(topo) +velo.initialize() +vorti.initialize() + +## alias to discrete fields components +w = vorti.discreteFields.values()[0].data +v = velo.discreteFields.values()[0].data + +from parmepy.mpi import MPI +from parmepy.numerics.finite_differences import FD_C_4 + +## FD scheme creation and init +fd_scheme = FD_C_4((topo.mesh.space_step)) +fd_scheme.computeIndices(topo.mesh.iCompute) + +import numpy as np +#### Timings for finite differences schemes + +iter = 10 +# Allocation of work vector to store results +result = npw.zeros_like(w[0]) + +## Add raw_input to check memory with htop +print 'press any key to start tests' +raw_input() + +########### Test 1 : finite diff. direct call ########### + +#### First method ##### +print 'start ...' +time = MPI.Wtime() +for i in xrange(iter): + fd_scheme.compute(w[0], 0, result) +print 'FD compute method time ', MPI.Wtime() - time +raw_input() +print 'start ... ' +#### Second method ##### +time = MPI.Wtime() +for i in xrange(iter): + fd_scheme.compute_and_add(w[0], 0, result) +print 'FD compute_and_add method time ', MPI.Wtime() - time + +del result +iter = 1 + +# +# Res : first method is faster and cost less in memory +# + +########### Test 2 : 'FD, with accumulation of the result ########### + +result = [npw.zeros_like(w[0]) for i in xrange(2)] + +## fd results are accumulated into result[0] + +raw_input() +print 'start' +#### First method #### +## Needs at least two workspaces and one is used for the final result. +time = MPI.Wtime() +for i in xrange(iter): + #for cdir in xrange(3): + fd_scheme.compute(w[0], 0, result[0]) + fd_scheme.compute(w[1], 0, result[1]) + np.add(result[0], result[1], result[0]) + fd_scheme.compute(w[2], 0, result[1]) + np.add(result[0], result[1], result[0]) +print 'Accumulate/FD Compute meth time ', MPI.Wtime() - time + +raw_input() +print 'start ...' +result2 = npw.zeros_like(w[0]) +#### Second method #### +## Needs only one workspace, used for the final result. +time = MPI.Wtime() +for i in xrange(iter): + #for cdir in xrange(3): + fd_scheme.compute(w[0], 0, result2) + fd_scheme.compute_and_add(w[1], 0, result2) + fd_scheme.compute_and_add(w[2], 0, result2) +print 'Accumulate/FD Compute_and_add meth time ', MPI.Wtime() - time + +print 'Computation ok? ', np.allclose(result[0], result2) +del result +del result2 + +# +# Res : first method is faster and cost less in memory +# + +########### Test 3 : 'real' case +# corresponding more or less to divV computation ########### +# Needs an intermediate workspace to compute w.v +raw_input() +print 'start ...' +work = [npw.zeros_like(w[0]) for i in xrange(3)] +raw_input() + +## fd results are accumulated into result[0] + +#### First method #### +## Needs at least three workspaces and one is used for the final result. +time = MPI.Wtime() +for i in xrange(iter): + #for cdir in xrange(3): + work[2][...] = v[0] * w[0] + fd_scheme.compute(work[2], 0, work[0]) + work[2][...] = v[0] * w[1] + fd_scheme.compute(work[2], 0, work[1]) + np.add(work[0], work[1], work[0]) + work[2][...] = v[0] * w[2] + fd_scheme.compute(work[2], 0, work[1]) + np.add(work[0], work[1], work[0]) +print 'Div/FD Compute meth time ', MPI.Wtime() - time +time = MPI.Wtime() +for i in xrange(iter): + #for cdir in xrange(3): + work[2][...] = v[0] * w[0] + fd_scheme.compute(work[2], 0, work[0]) + work[2][...] = v[0] * w[1] + fd_scheme.compute(work[2], 0, work[1]) + work[0][...] += work[1] + work[2][...] = v[0] * w[2] + fd_scheme.compute(work[2], 0, work[1]) + work[0][...] += work[1] +print 'Div/FD Compute meth time (bis) ', MPI.Wtime() - time + +#### Second method #### +## Needs only two workspace, used for the final result. +raw_input() +print 'start ...' +work2 = [npw.zeros_like(w[0]) for i in xrange(2)] +raw_input() + +print 'pre id ...', id(work2[1]) +time = MPI.Wtime() +for i in xrange(iter): + #for cdir in xrange(3): + work2[1][...] = v[0] * w[0] + print 'post id ...', id(work2[1]) + fd_scheme.compute(work2[1], 0, work2[0]) + work2[1][...] = v[0] * w[1] + fd_scheme.compute_and_add(work2[1], 0, work2[0]) + work2[1][...] = v[0] * w[2] + fd_scheme.compute_and_add(work2[1], 0, work2[0]) +print 'Div/FD Compute_and_add meth time ', MPI.Wtime() - time + +print 'Computation ok? ', np.allclose(work[0], work2[0]) + +del work +del work2 +# +# Res : first method is faster and cost less in memory +# (but it's almost the same) +# + +########### Test 4 : 'real' case +# corresponding more or less to divT computation ########### +# Needs an intermediate workspace to compute w.v + +raw_input() +print 'start ...' +work = [npw.zeros_like(w[0]) for i in xrange(5)] +raw_input() + +## fd results are accumulated into work[0:2] + +#### First method #### +## Needs at least five workspaces and three are used for the final result. +# work[i] = divV(w vi) +time = MPI.Wtime() +for i in xrange(iter): + # Compute first component of divT ... + work[2] = v[0] * w[0] + fd_scheme.compute(work[2], 0, work[0]) + work[2] = v[0] * w[1] + fd_scheme.compute(work[2], 0, work[1]) + np.add(work[0], work[1], work[0]) + work[2] = v[0] * w[2] + fd_scheme.compute(work[2], 0, work[1]) + np.add(work[0], work[1], work[0]) + # Second component ... work[0] can not be used anymore + work[3] = v[0] * w[0] + fd_scheme.compute(work[3], 0, work[1]) + work[3] = v[0] * w[1] + fd_scheme.compute(work[3], 0, work[2]) + np.add(work[1], work[2], work[1]) + work[3] = v[0] * w[2] + fd_scheme.compute(work[3], 0, work[2]) + np.add(work[1], work[2], work[1]) + # Last component ... work[0] and work[1] can not be used anymore + work[4] = v[0] * w[0] + fd_scheme.compute(work[4], 0, work[2]) + work[4] = v[0] * w[1] + fd_scheme.compute(work[4], 0, work[3]) + np.add(work[2], work[3], work[2]) + work[4] = v[0] * w[2] + fd_scheme.compute(work[4], 0, work[3]) + np.add(work[2], work[3], work[2]) + +print 'DivT/FD Compute meth time ', MPI.Wtime() - time +#### Second method #### +## Needs only 4 workspaces, used for the final result. +raw_input() +print 'start ...' +work2 = [npw.zeros_like(w[0]) for i in xrange(4)] +time = MPI.Wtime() +for i in xrange(iter): + # Compute first component of divT ... + work2[3] = v[0] * w[0] + fd_scheme.compute(work2[3], 0, work2[0]) + work2[3] = v[0] * w[1] + fd_scheme.compute_and_add(work2[3], 0, work2[0]) + work2[3] = v[0] * w[2] + fd_scheme.compute_and_add(work2[3], 0, work2[0]) + # Second component ... work2[0] can not be used anymore + work2[3] = v[0] * w[0] + fd_scheme.compute(work2[3], 0, work2[1]) + work2[3] = v[0] * w[1] + fd_scheme.compute_and_add(work2[3], 0, work2[1]) + work2[3] = v[0] * w[2] + fd_scheme.compute_and_add(work2[3], 0, work2[1]) + # Last component ... work2[0] and work2[1] can not be used anymore + work2[3] = v[0] * w[0] + fd_scheme.compute(work2[3], 0, work2[2]) + work2[3] = v[0] * w[1] + fd_scheme.compute_and_add(work2[3], 0, work2[2]) + work2[3] = v[0] * w[2] + fd_scheme.compute_and_add(work2[3], 0, work2[2]) + +print 'DivT/FD Compute_and_add meth time ', MPI.Wtime() - time + +ind = topo.mesh.iCompute +for i in xrange(3): + print 'Computation ok? ', np.allclose(work[i][ind], work2[i][ind]) +# +# Res : first method is faster and cost less in memory +# (but it's almost the same) +# + + +## Conclusion (if res > 200 **)-> it seems that first method, although it needs +# more initial workspaces, is always faster and cheaper +# concerning memory (because of hidden tmp alloc) +# BUT for smaller resolutions, second method may be more efficient ... diff --git a/HySoP/hysop/__init__.py.in b/HySoP/hysop/__init__.py.in index 7be56d3b0dfabfa1eaf9dc951fca2aaf7f45b035..f680e04c9d5e08c39b522340178e6727917cf288 100755 --- a/HySoP/hysop/__init__.py.in +++ b/HySoP/hysop/__init__.py.in @@ -16,7 +16,7 @@ __SCALES_ENABLED__ = "@WITH_SCALES@" is "ON" __VERBOSE__ = "@DEBUG@" in ["1", "3"] __DEBUG__ = "@DEBUG@" in ["2", "3"] __PROFILE__= "@PROFILE@" in ["0", "1"] - +__OPTIMIZE__= "@OPTIM@" is "ON" # MPI if __MPI_ENABLED__: #import parmepy.mpi as mpi diff --git a/HySoP/hysop/constants.py b/HySoP/hysop/constants.py index 6c918744699f2b2a5e093cec5d531d8f3b363a35..3bd0f80c5b18556dd01acce404ab786aa3e7295b 100644 --- a/HySoP/hysop/constants.py +++ b/HySoP/hysop/constants.py @@ -3,10 +3,18 @@ Constant parameters required for the parmepy package (internal use). """ -from parmepy import __DEBUG__, __PROFILE__ +from parmepy import __DEBUG__, __PROFILE__, __OPTIMIZE__ import numpy as np import math from parmepy.mpi import MPI +# Utilities for serialization +if __OPTIMIZE__: + import cPickle + parmesPickle = cPickle +else: + from scitools.NumPyDB import NumPyDB_cPickle + parmesPickle = NumPyDB_cPickle + PI = math.pi # Set default type for real and integer numbers diff --git a/HySoP/hysop/dataloader.py b/HySoP/hysop/dataloader.py deleted file mode 100644 index b19591b10303071aab86ea6cff315ad94603f279..0000000000000000000000000000000000000000 --- a/HySoP/hysop/dataloader.py +++ /dev/null @@ -1,249 +0,0 @@ -""" -@file dataloader.py - -Program which take a input data file, and make -the variable -""" -#import parmepy.constants import debug -from parmepy.constants import np - - -class DataLoader(object): - """ - Take a data file which containt all variables of the simulation. - and he test and load the variable into the dataloader class. - """ - -# @debug - def __new__(cls, *args, **kw): - return object.__new__(cls, *args, **kw) - -# @debug - def __init__(self, filename='input.dat'): - """ - the initialization will be ignore the line with '#' caracter - - @param filename: name of data file. - """ - self.filename = filename - self.varlist = [] - filestream = open(filename, 'r') - print '=============Load==============' - count = 0 - for line in filestream: - if line[0] == '#' or line.find('=') < 1: - pass - else: - decoup = line.split('=', 1) - loadok = self.loadvar(decoup) - if loadok: - count = count + 1 - filestream.close() - self.numvarload = count - - def loadvar(self, descriptvar): - if str(descriptvar[0]).find('Origin X') >=0: - self.Ox = np.float64(descriptvar[1]) - self.varlist.append('Ox') - return True - if descriptvar[0].find('Origin Y') >= 0: - self.Oy = np.float64(descriptvar[1]) - self.varlist.append('Oy') - return True - if descriptvar[0].find('Origin Z') >= 0: - self.Oz = np.float64(descriptvar[1]) - self.varlist.append('Oz') - return True - if descriptvar[0].find('Lenght X') >= 0: - self.Lx = np.float64(descriptvar[1]) - self.varlist.append('Lx') - return True - if descriptvar[0].find('Lenght Y') >= 0: - self.Ly = np.float64(descriptvar[1]) - self.varlist.append('Ly') - return True - if descriptvar[0].find('Lenght Z') >= 0: - self.Lz = np.float64(descriptvar[1]) - self.varlist.append('Lz') - return True - if descriptvar[0].find('Resolution X') >= 0: - self.resX = np.float64(descriptvar[1]) - self.varlist.append('resX') - return True - if descriptvar[0].find('Resolution Y') >= 0: - self.resY = np.float64(descriptvar[1]) - self.varlist.append('resY') - return True - if descriptvar[0].find('Resolution Z') >= 0: - self.resZ = np.float64(descriptvar[1]) - self.varlist.append('resZ') - return True - if descriptvar[0].find('Time Step') >= 0: - self.timeStep = np.float64(descriptvar[1]) - self.varlist.append('timeStep') - return True - if descriptvar[0].find('Final Time') >= 0: - self.finalTime = np.float64(descriptvar[1]) - self.varlist.append('finalTime') - return True - if descriptvar[0].find('Restart') >= 0: - self.restart = np.uint32(descriptvar[1]) - self.varlist.append('restart') - return True - if descriptvar[0].find('Frequency of Saving') >= 0: - self.dumpfreq = np.uint32(descriptvar[1]) - self.varlist.append('dumpfreq') - return True - if descriptvar[0].find('File to restart') >= 0: - self.dumpfile = descriptvar[1] - self.varlist.append('dumpfile') - return True - if descriptvar[0].find('Frequency of Visualisation') >= 0: - self.outModuloPrinter = np.uint32(descriptvar[1]) - self.varlist.append('outModuloPrinter') - return True - if descriptvar[0].find('Output file for visualisation') >= 0: - self.outputPrinter = ''.join(descriptvar[1].rstrip('\n').split()) - self.varlist.append('outputPrinter') - return True - if descriptvar[0].find('Frequency of Forces output') >= 0: - self.outModuloForces = np.uint32(descriptvar[1]) - self.varlist.append('outModuloForces') - return True - if descriptvar[0].find('Output file for Forces') >= 0: - self.outputForces = ''.join(descriptvar[1].rstrip('\n').split()) - self.varlist.append('outputForces') - return True - if descriptvar[0].find('Frequency of Energy output') >= 0: - self.outModuloEnergies = np.float64(descriptvar[1]) - self.varlist.append('outModuloEnergies') - return True - if descriptvar[0].find('Output file for Energy') >= 0: - self.outputEnergies = ''.join(descriptvar[1].rstrip('\n').split()) - self.varlist.append('outputEnergies') - return True - if descriptvar[0].find('Upstream velocity') >= 0: - self.uinf = np.float64(descriptvar[1]) - self.varlist.append('uinf') - return True - if descriptvar[0].find('Density') >= 0: - self.rho = np.float64(descriptvar[1]) - self.varlist.append('rho') - return True - if descriptvar[0].find('Viscosity') >= 0: - self.visco = np.float64(descriptvar[1]) - self.varlist.append('visco') - return True - if descriptvar[0].find('Dens2') >= 0: - self.rho2 = np.float64(descriptvar[1]) - self.varlist.append('rho2') - return True - if descriptvar[0].find('Visco2') >= 0: - self.visco2 = np.float64(descriptvar[1]) - self.varlist.append('visco2') - return True - if descriptvar[0].find('Advection Method') >= 0: - self.methodAdv = descriptvar[1].rstrip('\n') - self.varlist.append('methodAdv') - return True - if descriptvar[0].find('Scalar Advec Method') >= 0: - self.methodAdvSc = descriptvar[1].rstrip('\n') - self.varlist.append('methodAdvSc') - return True - if descriptvar[0].find('Stretching Method') >= 0: - self.methodStretch = descriptvar[1].rstrip('\n') - self.varlist.append('methodStretch') - return True - if descriptvar[0].find('Stretching Property') >= 0: - self.propStretch = descriptvar[1].rstrip('\n') - self.varlist.append('propStretch') - return True - if descriptvar[0].find('Baro Method') >= 0: - self.methodBaro = descriptvar[1].rstrip('\n') - self.varlist.append('methodBaro') - return True - if descriptvar[0].find('Penalisation Method') >= 0: - self.methodPenal = descriptvar[1].rstrip('\n') - self.varlist.append('methodPenal') - return True - if descriptvar[0].find('Forces Method') >= 0: - self.methodForces = descriptvar[1].rstrip('\n') - self.varlist.append('methodForces') - return True - if descriptvar[0].find('Vorticity projection') >= 0: - self.vortProj = np.uint32(descriptvar[1]) - self.varlist.append('vortProj') - return True - if descriptvar[0].find('Multi resolution') >= 0: - self.multires = np.uint32(descriptvar[1]) - self.varlist.append('multires') - return True - if descriptvar[0].find('Resolution Filter X') >= 0: - self.resFilterX = np.uint32(descriptvar[1]) - self.varlist.append('resFilterX') - return True - if descriptvar[0].find('Resolution Filter Y') >= 0: - self.resFilterY = np.uint32(descriptvar[1]) - self.varlist.append('resFilterY') - return True - if descriptvar[0].find('Resolution Filter Z') >= 0: - self.resFilterZ = np.uint32(descriptvar[1]) - self.varlist.append('resFilterZ') - return True - if descriptvar[0].find('Name of Obstacle') >= 0: - self.obstName = descriptvar[1].rstrip('\n') - self.varlist.append('obstName') - return True - if descriptvar[0].find('Lambda Fluid') >= 0: - self.lambdFluid = np.float64(descriptvar[1]) - self.varlist.append('lambdFluid') - return True - if descriptvar[0].find('Lambda Porous') >= 0: - self.lambdPorous = np.float64(descriptvar[1]) - self.varlist.append('lambdPorous') - return True - if descriptvar[0].find('Lambda Solid') >= 0: - self.lambdSolid = np.float64(descriptvar[1]) - self.varlist.append('lambdSolid') - return True - if descriptvar[0].find('Obstacle Radius') >= 0: - self.obstRadius = np.float64(descriptvar[1]) - self.varlist.append('obstRadius') - return True - if descriptvar[0].find('Obstacle origin in X') >= 0: - self.obstOx = np.float64(descriptvar[1]) - self.varlist.append('obstOx') - return True - if descriptvar[0].find('Obstacle origin in Y') >= 0: - self.obstOy = np.float64(descriptvar[1]) - self.varlist.append('obstOy') - return True - if descriptvar[0].find('Obstacle origin in Z') >= 0: - self.obstOz = np.float64(descriptvar[1]) - self.varlist.append('obstOz') - return True - if descriptvar[0].find('Thickness of Porous layer') >= 0: - self.thicknPorous = np.float64(descriptvar[1]) - self.varlist.append('thicknPorous') - return True - if descriptvar[0].find('Size of Bound in Z direction') >= 0: - self.thicknZ = np.float64(descriptvar[1]) - self.varlist.append('thicknZ') - return True - print 'The variable ' + str(descriptvar[0]) + 'is unknown.' +\ - 'It\'s not load' - return False - - def __str__(self): - """ - Common printings for operators - """ - s = str(self.filename) + " has " + str(self.numvarload) +\ - " variables loaded, list:" +\ - ','.join(str(x) for x in self.varlist) - return s + "\n" - -if __name__ == "__main__": - print __doc__ - print "- Provided class : problem" - print Problem.__doc__ diff --git a/HySoP/hysop/f2py/fftw2py.f90 b/HySoP/hysop/f2py/fftw2py.f90 index 42764aca1e0a1ca93aa53bdb5162766618583c6c..abfa43c232bea0fcf9a6bd2faf1c5672decf9a2f 100755 --- a/HySoP/hysop/f2py/fftw2py.f90 +++ b/HySoP/hysop/f2py/fftw2py.f90 @@ -82,7 +82,7 @@ contains call filter_poisson_2d() call c2r_2d(velocity_x,velocity_y) - print *, "fortran resolution time : ", MPI_WTime() - start + //print *, "fortran resolution time : ", MPI_WTime() - start end subroutine solve_poisson_2d diff --git a/HySoP/hysop/f2py/scales2py.f90 b/HySoP/hysop/f2py/scales2py.f90 index f8a09d52a63f7416b5301261c97a33d2b8c7b641..6ea913f0af8d2cd1b640313012c915fd54cd4d41 100755 --- a/HySoP/hysop/f2py/scales2py.f90 +++ b/HySoP/hysop/f2py/scales2py.f90 @@ -72,7 +72,7 @@ contains t0 = MPI_Wtime() call advec_step(dt,vx,vy,vz,scal) - print *, "inside ...", cart_rank, ":", MPI_Wtime()-t0 + !!print *, "inside ...", cart_rank, ":", MPI_Wtime()-t0 end subroutine solve_advection end module scales2py diff --git a/HySoP/hysop/fields/discrete.py b/HySoP/hysop/fields/discrete.py index 63c76c5dcc1f7d82726b0173b8813d5b7a1bc96e..d169029914fbde3995028f9d79f5291e80c0cb06 100644 --- a/HySoP/hysop/fields/discrete.py +++ b/HySoP/hysop/fields/discrete.py @@ -3,8 +3,7 @@ Discrete fields (scalars or vectors) descriptions. """ -from parmepy.constants import debug -from scitools.NumPyDB import NumPyDB_cPickle +from parmepy.constants import debug, parmesPickle from itertools import count from parmepy.mpi import main_rank from parmepy.tools.timers import Timer, timed_function @@ -194,10 +193,10 @@ class DiscreteField(object): filename += str(main_rank) # create a new db if mode is None: - db = NumPyDB_cPickle(filename, mode='store') + db = parmesPickle(filename, mode='store') elif mode is 'append': # use an existing db - db = NumPyDB_cPickle(filename, mode='load') + db = parmesPickle(filename, mode='load') #for dim in xrange(self.dimension): # idd = self.name + '_' + str(dim) @@ -215,7 +214,7 @@ class DiscreteField(object): fieldname = self.name filename += '_rk_' filename += str(main_rank) - db = NumPyDB_cPickle(filename, mode='load') + db = cPickle(filename, mode='load') for dim in xrange(self.nbComponents): self.data[dim] = db.load(fieldname)[0][dim] diff --git a/HySoP/hysop/numerics/__init__.py b/HySoP/hysop/numerics/__init__.py index cc4d89e6e8ab27bde2044447cba91e555d3cd557..eec80ccf6aa0f446052af7ca5003db946437d80c 100644 --- a/HySoP/hysop/numerics/__init__.py +++ b/HySoP/hysop/numerics/__init__.py @@ -4,3 +4,14 @@ # # All functions in this package are supposed to work with numpy arrays, # not with parmepy fields. +# +# +# +# +# +# \section perfandmem About performances and optimisations +# +# \subsection Finite Differences +# Two ways : +# - fd.compute +# - fd.compute_and_add diff --git a/HySoP/hysop/numerics/differential_operations.py b/HySoP/hysop/numerics/differential_operations.py index 0262b549af8d34f6fc5299e68788b553455eb248..3d09a110f5da893bfe6f1771fb9ef20539f3cf57 100755 --- a/HySoP/hysop/numerics/differential_operations.py +++ b/HySoP/hysop/numerics/differential_operations.py @@ -5,6 +5,7 @@ Library of functions used to perform classical vector calculus (diff operations like grad, curl ...) """ from parmepy.constants import debug, XDIR, YDIR, ZDIR +import parmepy.tools.numpywrappers as npw from abc import ABCMeta, abstractmethod from finite_differences import FD_C_4 import numpy as np @@ -61,27 +62,27 @@ class Curl(DifferentialOperation): #data[2][...] = 0.0 #-- d/dy vz -- in data[XDIR] - self.fd_scheme.inplace(variable[ZDIR], YDIR, data[XDIR]) + self.fd_scheme.compute(variable[ZDIR], YDIR, data[XDIR]) # -- -d/dz vy -- in data[YDIR] work = data[YDIR] - self.fd_scheme.inplace(variable[YDIR], ZDIR, work) + self.fd_scheme.compute(variable[YDIR], ZDIR, work) # data_x = d/dy vz - d/dz vy data[XDIR][self.indices] -= work[self.indices] #-- d/dz vx -- in data[YDIR] - self.fd_scheme.inplace(variable[XDIR], ZDIR, data[YDIR]) + self.fd_scheme.compute(variable[XDIR], ZDIR, data[YDIR]) # -- -d/dx vz -- in data[ZDIR] work = data[ZDIR] - self.fd_scheme.inplace(variable[ZDIR], XDIR, work) + self.fd_scheme.compute(variable[ZDIR], XDIR, work) # data_y = d/dz vx - d/dx vz data[YDIR][self.indices] -= work[self.indices] # - d/dy vx in data[ZDIR] - self.fd_scheme.inplace(variable[XDIR], YDIR, data[ZDIR]) + self.fd_scheme.compute(variable[XDIR], YDIR, data[ZDIR]) data[ZDIR][self.indices] *= -1. # add d/dx vy, add to data[ZDIR] - self.fd_scheme.accumulate(variable[YDIR], XDIR, data[ZDIR]) + self.fd_scheme.compute_and_add(variable[YDIR], XDIR, data[ZDIR]) return data @@ -90,98 +91,371 @@ class DivV(DifferentialOperation): """ Computes \f$ \nabla.(\rho V) \f$, \f$ \rho\f$ a scalar field and V a vector field. + Works for any dimension. + + Methods : + 1 - FD_C_4 (default) : 4th order, centered finite differences, with + local workspace and based on fd.compute. + init : func = DivV(topo, memshape=shape) + call : result = func(var1, scal, result) + 2 - FD_C_4_with_work : same as above but user must provide a work space + during call (see DivV.FDCentral4_with_work) + init : func = DivV(topo, method='FD_C_4_work') + call : result = func(var1, scal, result, work) + 3 - FD_C_4_CAA : 4th order, centered finite differences, with + local workspace and based on fd.compute_and_add. + init : func = DivV(topo, method='FD_C_4_CAA', memshape=shape) + call : result = func(var1, scal, result) + 4 - FD_C_4_CAA_with_work : same as above but user must provide a work + space during call (see DivV.FDCentral4_CAA_with_work) + init : func = DivV(topo, method='FD_C_4_CAA_work') + call : result = func(var1, scal, result, work) + + shape is a tuple (like np.ndarray.shape), e.g. (nx,ny,nz). + + Note FP: + - 1/2 are always faster than 3/4 for large systems (>200*3) and + less memory consumming. For smaller systems, well, it depends ... + with_work option is worth if you already have some temporary fields + available, keeping in mind that they will be overwritten during + DivV call. + - if var1 is 1D, best choice is FD_C_4_CAA (with or without work) """ - def __init__(self, topo, method=None): - if method is not None and method.find('FD_order2') >= 0: - raise ValueError("2nd order scheme Not yet implemented") - elif method.find('FD_C4_nowork') >= 0: - self.fcall = self.FDCentral4 + def __init__(self, topo, method='FD_C_4', memshape=None): + + if method.find('FD_order2') >= 0: + raise ValueError("2nd order scheme Not yet implemented") + + elif method.find('FD_C_4_work') >= 0: + # - 4th ordered FD, + # - work space provided by used during function call, + # i.e. memshape not required, + # - fd.compute method + + # declare and create fd scheme self.fd_scheme = FD_C_4((topo.mesh.space_step)) - else: - self.fcall = self.FDCentral4Work + # connect to fd function call + self.fcall = self.FDCentral4_with_work + + elif method.find('FD_C_4_CAA_work') >= 0: + # - 4th ordered FD, + # - work space provided by used during function call, + # i.e. memshape not required, + # - fd.compute_and_add method + + # declare and create fd scheme + self.fd_scheme = FD_C_4((topo.mesh.space_step)) + # connect to fd function call + self.fcall = self.FDCentral4_CAA_with_work + + elif method.find('FD_C_4_CAA') >= 0: + # - 4th ordered FD, + # - local work space => memshape required, + # - fd.compute_and_add method + if memshape is None: + raise ValueError("memshape required for this method") + + # Max nb of components of input fields. + self.nbComponents = len(memshape) + self._WORKLENGTH = 1 + # declare and create fd scheme self.fd_scheme = FD_C_4((topo.mesh.space_step)) + # connect to finite differences function ... + self.fcall = self.FDCentral4_CAA + # allocate local workspace + self._work = [npw.zeros(memshape) + for i in xrange(self._WORKLENGTH)] - self.indices = topo.mesh.iCompute - self.fd_scheme.computeIndices(self.indices) + else: + # - 4th ordered FD, + # - local work space => memshape required, + # - fd.compute method + if memshape is None: + raise ValueError("memshape required for this method") + + # Max nb of components of input fields. + self.nbComponents = len(memshape) + if self.nbComponents == 1: # 1D case + self._WORKLENGTH = 1 + else: # 2 and 3D cases + self._WORKLENGTH = 2 + # declare and create fd scheme + self.fd_scheme = FD_C_4((topo.mesh.space_step)) + # connect to finite differences function ... + self.fcall = self.FDCentral4 + # allocate local workspace + self._work = [npw.zeros(memshape) + for i in xrange(self._WORKLENGTH)] + + self._indices = topo.mesh.iCompute + # initialize fd scheme + self.fd_scheme.computeIndices(self._indices) + # check ghosts ... must be updated when + # other fd schemes will be implemented. assert (topo.ghosts >= 2).all(),\ 'you need a ghost layer for FD4 scheme.' - self.nbComponents = topo.domain.dimension - def __call__(self, var1, scal, data=None, work=None): - return self.fcall(var1, scal, data) + def __call__(self, var1, scal, result, work=None): + return self.fcall(var1, scal, result, work) - def FDCentral4Work(self, var1, scal, data=None, work=None): + def FDCentral4(self, var1, scal, result, work=None): """ Compute central finite difference scheme, order 4. + No work vector provided by user --> self._work will be + used. It must be created at init and thus memshape is required. """ - if data is None: - data = np.zeros_like(var1[0]) - if work is None: - work = np.zeros_like(var1[0]) - - #data[...] = 0.0 - - # d/dx (scal * v1x), saved in data - self.fd_scheme.inplace(scal * var1[XDIR], XDIR, data) - # d/dy (scal * v1y), saved in work - self.fd_scheme.inplace(scal * var1[YDIR], YDIR, work) - data[self.indices] += work[self.indices] - # d/dz(scal * v1z), saved in work - self.fd_scheme.inplace(scal * var1[ZDIR], ZDIR, work) - data[self.indices] += work[self.indices] - return data - - def FDCentral4(self, var1, scal, data=None): + assert scal is not result + for i in xrange(len(var1)): + assert var1[i] is not result + + # _work[0:1] are used as temporary space + # for computation + # div computations are accumulate into result. + # result does not need initialisation to zero. + + # d/dx (scal * var1x), saved into result + self._work[0][...] = scal * var1[XDIR] + self.fd_scheme.compute(self._work[0], XDIR, result) + # other components (if any) saved into _work[0] and added into result + # d/dy (scal * var1y), saved in work and added into result + # d/dz(scal * var1z), saved in work and added into result (if 3D) + for cdir in xrange(1, len(var1)): + self._work[0][...] = scal * var1[cdir] + self.fd_scheme.compute(self._work[0], cdir, self._work[1]) + result[self._indices] += self._work[1][self._indices] + + return result + + def FDCentral4_with_work(self, var1, scal, result, work): """ + Compute central finite difference scheme, order 4. + Work vector provided by user. + Warning : work content will be overwritten. """ - if data is None: - data = np.zeros_like(var1[0]) - else: - data[...] = 0.0 - - # d/dx (scal * v1x) - self.fd_scheme.accumulate(var1[XDIR] * scal, XDIR, data) - # d/dy (scal * v1y) - self.fd_scheme.accumulate(var1[YDIR] * scal, YDIR, data) - # d/dz(scal * v1z) - self.fd_scheme.accumulate(var1[ZDIR] * scal, ZDIR, data) - - return data + assert scal is not result + assert len(work) >= 2 + # indeed 1 is enough for 1D case and 2 for 2D and 3D. + # I set 2 by default to avoid "if" call. + # Anyway, if var1 is 1D, the best solution is to use + # FDCentral4_CAA_with_work. + + for i in xrange(len(var1)): + assert var1[i] is not result + if i < len(work): + assert work[i].shape == var1[i].shape + + # work[0:1] are used as temporary space + # for computation + # div computations are accumulate into result. + # result does not need initialisation to zero. + + # d/dx (scal * var1x), saved into result + work[0][...] = scal * var1[XDIR] + self.fd_scheme.compute(work[0], XDIR, result) + # other components (if any) saved into _work[1] and added into result + # d/dy (scal * var1y), saved in work and added into result + # d/dz(scal * var1z), saved in work and added into result (if 3D) + for cdir in xrange(1, len(var1)): + work[0][...] = scal * var1[cdir] + self.fd_scheme.compute(work[0], cdir, work[1]) + result[self._indices] += work[1][self._indices] + + return result + + def FDCentral4_CAA(self, var1, scal, result, work=None): + """ + Compute central finite difference scheme, order 4. + Use fd_scheme.compute_and_add. + No work space as input (use self._work). + It must be created at init and thus memshape is required. + CAA stands for compute_and_add. + """ + assert scal is not result + for i in xrange(len(var1)): + assert var1[i] is not result + + # _work[0] is used as temporary space + # for computation + # div computations are accumulate into result. + # result does not need initialisation to zero. + + # d/dx (scal * var1x), saved into result + self._work[0][...] = scal * var1[XDIR] + self.fd_scheme.compute(self._work[0], XDIR, result) + # other components (if any) saved into _work[0] and added into result + # d/dy (scal * var1y), saved in work and added into result + # d/dz(scal * var1z), saved in work and added into result (if 3D) + for cdir in xrange(1, len(var1)): + self._work[0][...] = scal * var1[cdir] + self.fd_scheme.compute_and_add(self._work[0], cdir, result) + + return result + + def FDCentral4_CAA_with_work(self, var1, scal, result, work): + """ + Compute central finite difference scheme, order 4. + Use fd_scheme.compute_and_add. + No work space as input (use self._work). + It must be created at init and thus memshape is required. + """ + assert scal is not result + for i in xrange(len(var1)): + assert var1[i] is not result + assert len(work) >= 1 + for i in xrange(len(var1)): + assert var1[i] is not result + assert work[0].shape == var1[0].shape + + # _work[0] is used as temporary space + # for computation + # div computations are accumulate into result. + # result does not need initialisation to zero. + + # d/dx (scal * var1x), saved into result + work[0][...] = scal * var1[XDIR] + self.fd_scheme.compute(work[0], XDIR, result) + # other components (if any) saved into _work[0] and added into result + # d/dy (scal * var1y), saved in work and added into result + # d/dz(scal * var1z), saved in work and added into result (if 3D) + for cdir in xrange(1, len(var1)): + work[0][...] = scal * var1[cdir] + self.fd_scheme.compute_and_add(work[0], cdir, result) + + return result class DivT(DifferentialOperation): """ - Computes \f$ \nabla.(wx.V,wy.V,wz.V) \f$, \f$w\f$ and V some vector fields. + Computes \f$ \nabla.(W.Vx, W.Vy, W.Vz) \f$, + \f$w\f$ and V some vector fields. + + Methods : + 1 - FD_C_4 (default) : 4th order, centered finite differences, with + local workspace and based on fd.compute. + init : func = DivT(topo, memshape=shape) + call : result = func(var1, var2, result) + 2 - FD_C_4_work : same as above but user must provide a work space + during call (see DivT.FDCentral4_with_work) + init : func = DivT(topo, method='FD_C_4_work') + call : result = func(var1, var2, result, work) + 3 - FD_C_4_CAA : 4th order, centered finite differences, with + local workspace and based on fd.compute_and_add. + init : func = DivT(topo, method='FD_C_4_CAA', memshape=shape) + call : result = func(var1, var2, result) + 4 - FD_C_4_CAA_work : same as above but user must provide a work + space during call (see DivT.FDCentral4_CAA_with_work) + init : func = DivT(topo, method='FD_C_4_CAA_work') + call : result = func(var1, var2, result, work) + + var1 stands for W and var2 for V. + result must be a vector field (same number of components as var1 and var2), + each component with the same size as var1[i]. + work must be a two component vector field, (only 1 component for CAA cases) + each component with the same size as var1[i]. + """ - def __init__(self, topo, method=None): + def __init__(self, topo, method=None, memshape=None): if method is not None and method.find('FD_order2') >= 0: raise ValueError("2nd order scheme Not yet implemented") + elif method.find('FD_C_4_work') >= 0: + # - 4th ordered FD, + # - work space provided by used during function call, + # memshape not required + # - fd.compute method + + # connect call function + self.fcall = self.FDCentral4_with_work + # set div(scal.var1) operator + self.div = DivV(topo, method='FD_C_4_work') + + elif method.find('FD_C_4_CAA_work') >= 0: + # - 4th ordered FD, + # - work space provided by used during function call, + # memshape not required + # - fd.compute_and_add method + + # connect call function + self.fcall = self.FDCentral4_with_work + # set div(scal.var1) operator + self.div = DivV(topo, method='FD_C_4_CAA_work') + + elif method.find('FD_C_4_CAA') >= 0: + # - 4th ordered FD, + # - local workspace (memshape required) + # - fd.compute_and_add method + + # allocate local workspace + if memshape is None: + raise ValueError("memshape required for this method") + + # Max nb of components of input fields. + self.nbComponents = len(memshape) + self._WORKLENGTH = 1 + # allocate local workspace + self._work = [npw.zeros(memshape) + for i in xrange(self._WORKLENGTH)] + + # connect call function + self.fcall = self.FDCentral4 + # set div(scal.var1) operator + self.div = DivV(topo, method='FD_C_4_CAA_work') + else: + # - 4th ordered FD, + # - local workspace (memshape required) + # - fd.compute method + + # allocate local workspace + if memshape is None: + raise ValueError("memshape required for this method") + + # Max nb of components of input fields. + self.nbComponents = len(memshape) + if self.nbComponents == 1: # 1D case + self._WORKLENGTH = 1 + else: + self._WORKLENGTH = 2 + # allocate local workspace + self._work = [npw.zeros(memshape) + for i in xrange(self._WORKLENGTH)] + + # connect call function self.fcall = self.FDCentral4 - self.div = DivV(topo, method='FD_C4') - self.divZ = DivV(topo, method='FD_C4_nowork') - self.nbComponents = topo.domain.dimension + # set div(scal.var1) operator + self.div = DivV(topo, method='FD_C_4_work') - def __call__(self, var1, var2, data=None): - return self.fcall(var1, var2, data) + def __call__(self, var1, var2, result, work=None): + return self.fcall(var1, var2, result, work) - def FDCentral4(self, var1, var2, data=None): + def FDCentral4(self, var1, var2, result, work=None): """ """ - # init (if required) return data - if data is None: - data = [] - for i in xrange(self.nbComponents): - data.append(np.zeros_like(var1[i])) - - # we use data[Y]Â and data[Z] as work ... - data[XDIR] = self.div(var1, var2[XDIR], - data=data[XDIR], work=data[YDIR]) - data[YDIR] = self.div(var1, var2[YDIR], - data=data[YDIR], work=data[ZDIR]) - # No work vector for zdir - data[ZDIR] = self.divZ(var1, var2[ZDIR], data=data[ZDIR]) - return data + assert len(result) == len(var1) + + # Note FP var1[dir] and var2[dir] must be different from result. + # This is checked inside divV call. + + for cdir in xrange(len(var1)): + result[cdir] = self.div(var1, var2[cdir], + result=result[cdir], work=self._work) + + return result + + def FDCentral4_with_work(self, var1, var2, result, work): + """ + """ + assert len(result) == len(var1) + + # indeed 1 is enough for 1D component fields ... + + # Note FP var1[dir] and var2[dir] must be different from result. + # This is checked inside divV call. + + for cdir in xrange(len(var1)): + result[cdir] = self.div(var1, var2[cdir], + result=result[cdir], work=work) + + return result class GradS(DifferentialOperation): @@ -197,8 +471,8 @@ class GradS(DifferentialOperation): else: raise ValueError("FD scheme Not yet implemented") - self.indices = topo.mesh.iCompute - self.fd_scheme.computeIndices(self.indices) + self._indices = topo.mesh.iCompute + self.fd_scheme.computeIndices(self._indices) assert (topo.ghosts >= 2).all(),\ 'you need a ghost layer for FD4 scheme.' ## number of components for vector fields @@ -215,15 +489,15 @@ class GradS(DifferentialOperation): if data is None: data = [] for i in xrange(self.nbComponents): - data.append(np.zeros_like(scal)) + data.append(npw.zeros_like(scal)) #data[...] = 0.0 # d/dx (scal), saved in data - self.fd_scheme.inplace(scal, XDIR, data[XDIR]) + self.fd_scheme.compute(scal, XDIR, data[XDIR]) # d/dy (scal), saved in data - self.fd_scheme.inplace(scal, YDIR, data[YDIR]) + self.fd_scheme.compute(scal, YDIR, data[YDIR]) # d/dz(scal), saved in data - self.fd_scheme.inplace(scal, ZDIR, data[ZDIR]) + self.fd_scheme.compute(scal, ZDIR, data[ZDIR]) return data @@ -238,8 +512,6 @@ class GradVxW(DifferentialOperation): else: self.fcall = self.FDCentral4 self.grad = GradS(topo, method='FD_C4') - - self.divZ = DivV(topo, method='FD_C4_nowork') self.nbComponents = topo.domain.dimension if self.nbComponents == 1: @@ -260,7 +532,7 @@ class GradVxW(DifferentialOperation): if data is None: data = [] for i in xrange(self.worklength): - data.append(np.zeros_like(var1[0])) + data.append(npw.zeros_like(var1[0])) else: assert len(data) == self.worklength if diagnostics is not None: @@ -299,7 +571,7 @@ class BlockProd(object): def __call__(self, list1, list2): # assert len(list1) == len(list2) - data = np.zeros_like(list1[0]) + data = npw.zeros_like(list1[0]) for i in xrange(len(list1)): data += list1[i] * list2[i] diff --git a/HySoP/hysop/numerics/finite_differences.py b/HySoP/hysop/numerics/finite_differences.py index 7f2ebf3e9d4ed8c4dcb70f504bb6a7ccd4d596fc..8e086051b45c150ac8f29faf850e72b593b09dec 100644 --- a/HySoP/hysop/numerics/finite_differences.py +++ b/HySoP/hysop/numerics/finite_differences.py @@ -24,6 +24,25 @@ class FiniteDifference(object): class FD_C_4(FiniteDifference): """ Centered scheme, 4th order. + + usage : + # a step from domain discretisation description + step = topo.mesh_space_step + # declare scheme + scheme = FD_C_4(step) + # init scheme using local grid points indices + scheme.computeIndices(topo.mesh.iCompute) + # Then compute : result = fd(tab) + scheme.compute(tab, dir, result) + # or : result = result + fd(tab) + scheme.compute_and_add() + + Notes FP : + Compute method is much more performant than compute and add + and needs less memory. But in some cases (when fd results need + to be accumulate into a field) compute_and_add may be useful + to limit memory usage, if required. + See Global_tests/testPerfAndMemForFD_and_div.py for perf results. """ def __init__(self, step): @@ -72,14 +91,17 @@ class FD_C_4(FiniteDifference): self.indices[dim].step) #return self._m2, self._m1, self._a1, self._a2 - def inplace(self, tab, cdir, result): + def compute(self, tab, cdir, result): """ Compute \f$ \frac{\partial tab()}{\partial cdir} \f$ using fd scheme, - The result is saved in input/ouput parameter result (in place!) + The result is saved in input/ouput parameter result. @param tab : a numpy array @param cdir : direction of differentiation @param result : input/output numpy array to save the resulting data. """ + assert result is not tab + assert result.__class__ is np.ndarray + assert tab.__class__ is np.ndarray result[self.indices] = tab[self._a1[cdir]] result[self.indices] -= tab[self._m1[cdir]] result[self.indices] *= 8 @@ -87,14 +109,18 @@ class FD_C_4(FiniteDifference): result[self.indices] -= tab[self._a2[cdir]] result[self.indices] *= self._coeff[cdir] - def accumulate(self, tab, cdir, result): + def compute_and_add(self, tab, cdir, result): """ Compute \f$ \frac{\partial tab()}{\partial cdir} \f$ using fd scheme, The result is ADDED to input/ouput parameter result. @param tab : a numpy array @param cdir : direction of differentiation @param result : input/output numpy array to save the resulting data. + + Note FP : tab == result is allowed. """ + assert result.__class__ is np.ndarray + assert tab.__class__ is np.ndarray result[self.indices] += self._coeff[cdir] * (8 * (tab[self._a1[cdir]] - tab[self._m1[cdir]]) + diff --git a/HySoP/hysop/numerics/integrators/odesolver.py b/HySoP/hysop/numerics/integrators/odesolver.py index 57cb0832e96fa1bd93b68eae6bafb722b5cdd88d..c4b62635850d06a406c03f4698096ef8144aeedd 100644 --- a/HySoP/hysop/numerics/integrators/odesolver.py +++ b/HySoP/hysop/numerics/integrators/odesolver.py @@ -20,7 +20,8 @@ class ODESolver(NumMethod): __metaclass__ = ABCMeta @abstractmethod - def __init__(self, nbComponents, f=None, optim=None, is_rhs_has_work=False): + def __init__(self, nbComponents, f=None, optim=None, + is_rhs_has_work=False): """ @param nbComponents: number of components of the right-hand side. @param f : function f(t,y), right hand side of the equation to solve. @@ -57,7 +58,6 @@ class ODESolver(NumMethod): if optim is None: self._fcall = self._basic elif optim is LEVEL1: - print "optimisation ... " self._fcall = self._with_work elif optim is WITH_GUESS: self._fcall = self._with_guess diff --git a/HySoP/hysop/numerics/integrators/runge_kutta3.py b/HySoP/hysop/numerics/integrators/runge_kutta3.py index cb30b0b83e7906a44329a2493d005fc45a2ad2cf..2c5d596bc1b19fc64838953548708861ef129e6a 100755 --- a/HySoP/hysop/numerics/integrators/runge_kutta3.py +++ b/HySoP/hysop/numerics/integrators/runge_kutta3.py @@ -12,15 +12,16 @@ class RK3(ODESolver): """ ODESolver implementation for solving an equation system with RK3 method. """ - def __init__(self, nbComponents, f=None, optim=None, is_rhs_has_work=False): + def __init__(self, nbComponents, f=None, optim=None, + is_rhs_has_work=False): """ @param f function f(t,y) : Right hand side of the equation to solve. @param nbComponents : dimensions @param optim : to choose the level of optimization (memory management). - @param is_rhs_has_work : flag for pass a work list as last argument - for function f. Default = None. See parmepy.numerics.integrators for details. - """ + @param is_rhs_has_work : true if a 'work' list is present + as last argument of rhs function f. + """ ODESolver.__init__(self, nbComponents, f, optim=optim, is_rhs_has_work=is_rhs_has_work) self.name = 'RK3' @@ -93,7 +94,6 @@ class RK3(ODESolver): # k2 = f(t+dt/3, y + dt/3 *k1)) # k3 = f(t + 2/3 *dt , y + 2/3 dt * k2)) # result = y + 0.25 * dt * (k1 + 3 * k3) - # yn [np.add(y[i], work0[i] * dt / 3, yn[i]) for i in xrange(self._nbComponents)] diff --git a/HySoP/hysop/operator/discrete/stretching.py b/HySoP/hysop/operator/discrete/stretching.py index 961b30b2497a254b4a2f068c7089e75410da7479..4e0b70c7bcfceafc59fa16d7122c5f583ca8dd73 100755 --- a/HySoP/hysop/operator/discrete/stretching.py +++ b/HySoP/hysop/operator/discrete/stretching.py @@ -14,11 +14,17 @@ from parmepy.numerics.integrators.runge_kutta4 import RK4 from parmepy.tools.timers import timed_function from parmepy.numerics.differential_operations import DivT, GradVxW import parmepy.tools.numpywrappers as npw -import numpy as np import math ceil = math.ceil +def funcTest(var1, var2, data): + data[0][...] = var2[0] + data[1][...] = var2[1] + data[2][...] = var2[2] + return data + + class ConservativeStretching(DiscreteOperator): """ Discretisation of the following problem : @@ -26,7 +32,7 @@ class ConservativeStretching(DiscreteOperator): """ @debug - def __init__(self, velocity, vorticity, method=None): + def __init__(self, velocity, vorticity, method='FD_C_4'): """ @param velocity : discrete field @param vorticity : discrete field @@ -49,13 +55,13 @@ class ConservativeStretching(DiscreteOperator): # We must have a topo for this operator ... # A function to compute the gradient of a vector field - self.divFunc = DivT(self.velocity.topology, - method=method) + self.divFunc = DivT(self.velocity.topology, method=method, + memshape=self.velocity.data[0].shape) # Time integration scheme. def rhs(t, y, result): - result = self.divFunc(self.velocity.data, y, data=result) - return result + return self.divFunc(y, self.velocity.data, result) + # return result shape = self.velocity.data[0].shape self.nbc = 3 # Stretching only in 3D and for vector fields. @@ -85,9 +91,7 @@ class ConservativeStretching(DiscreteOperator): for i in xrange(3 * self.nbc)] # Create the time integrator -# self.timeIntegrator = self.num_method(self.vorticity.nbComponents, -# rhs, optim=WITH_GUESS) - self.timeIntegrator = self.num_method(self.vorticity.nbComponents, rhs) + self.timeIntegrator = self.num_method(self.nbc, rhs, optim=WITH_GUESS) @timed_function def apply(self, simulation): @@ -98,16 +102,16 @@ class ConservativeStretching(DiscreteOperator): dt = simulation.timeStep # current time t = simulation.time - # Call time integrator - self.vorticity.data[0][...], self.vorticity.data[1][...], \ - self.vorticity.data[2][...] = \ - self.timeIntegrator(t, self.vorticity.data, dt) -# self._workspace[:self.nbc] = \ -# self.timeIntegrator.f(t, self.vorticity.data, -# self._workspace[:self.nbc]) -# self.vorticity.data = self.timeIntegrator(t, self.vorticity.data, dt, -# result=self.vorticity.data, -# work=self._workspace) + # - Call time integrator - + # Init workspace with a first evaluation of the + # rhs of the integrator + self._workspace[:self.nbc] = \ + self.timeIntegrator.f(t, self.vorticity.data, + self._workspace[:self.nbc]) + # perform integration and save result in-place + self.vorticity.data = self.timeIntegrator(t, self.vorticity.data, dt, + work=self._workspace, + result=self.vorticity.data) class GradStretching(DiscreteOperator): @@ -146,7 +150,9 @@ class GradStretching(DiscreteOperator): # Time integration scheme. def rhs(t, y, result): - result, diag = self.graduv(self.velocity.data, y) + result, diagnostics =\ + self.graduv(self.velocity.data, y) + #result, diag = self.graduv(self.velocity.data, y) return result # Todo : check fd call with numpy and/or parmes fields. @@ -168,15 +174,21 @@ class GradStretching(DiscreteOperator): else: self.num_method = RK3 self.cststretch = 2.5127 + shape = self.velocity.data[0].shape + self.nbc = 3 # Stretching only in 3D and for vector fields. + self._workspace = [npw.zeros(shape) + for i in xrange(3 * self.nbc)] ## time integrator - self.timeIntegrator = self.num_method(self.vorticity.nbComponents, rhs) + self.timeIntegrator = self.num_method(self.vorticity.nbComponents, rhs, + optim=WITH_GUESS) ## a vector of float with diagnostics computed ## from GradVxW (max div ...) - self.diagnostics = np.ones((5)) + self.diagnostics = npw.ones((5)) @timed_function def apply(self, simulation): + ind = self.velocity.topology.mesh.iCompute if simulation is None: raise ValueError("Missing simulation value for computation.") @@ -184,16 +196,30 @@ class GradStretching(DiscreteOperator): dt = simulation.timeStep # current time t = simulation.time - self.diagnostics = self.graduv(self.velocity, self.vorticity, self.diagnostics)[1] + #self.diagnostics = self.graduv(self.velocity, self.vorticity, self.diagnostics)[1] # Compute the number of required subcycles ndt, subdt = self.checkStability(dt) # Call time integrator print "time steps ...", dt, subdt + print "pre apply Grad...", self.vorticity.data[0].max() + assert sum(subdt) == dt for i in xrange(ndt): - self.vorticity.data[0], self.vorticity.data[1],\ - self.vorticity.data[2] =\ - self.timeIntegrator(t, self.vorticity.data, subdt[i]) + print "Update workspace/Grad, pre", self._workspace[0][ind].max() + self._workspace[:self.vorticity.nbComponents] = \ + self.timeIntegrator.f(t, self.vorticity.data, + self._workspace + [:self.vorticity.nbComponents]) + print "Update workspace/Grad, post", self._workspace[0][ind].max() + # perform integration and save result in-place + self.vorticity.data = self.timeIntegrator( + t, self.vorticity.data, + subdt[i], + work=self._workspace, + result=self.vorticity.data) + #self.vorticity.data[0], self.vorticity.data[1],\ + # self.vorticity.data[2] =\ + # self.timeIntegrator(t, self.vorticity.data, subdt[i]) def checkStability(self, dt): """ @@ -205,7 +231,7 @@ class GradStretching(DiscreteOperator): """ dt_stab = min(dt, self.cststretch / self.diagnostics[1]) nb_cycles = int(ceil(dt / dt_stab)) - subdt = np.zeros((nb_cycles)) + subdt = npw.zeros((nb_cycles)) subdt[:] = dt_stab subdt[-1] = dt - (nb_cycles - 1) * dt_stab return nb_cycles, subdt diff --git a/HySoP/hysop/operator/energy_enstrophy.py b/HySoP/hysop/operator/energy_enstrophy.py index 22208d0ccdfed5d10478a264a4d5233f2e1b6ff3..845736c1b6a730572ce328cc62a784144e7c3dbf 100644 --- a/HySoP/hysop/operator/energy_enstrophy.py +++ b/HySoP/hysop/operator/energy_enstrophy.py @@ -6,7 +6,7 @@ Compute Energy and Enstrophy import numpy as np from parmepy.constants import debug, PARMES_MPI_REAL from parmepy.operator.monitors.monitoring import Monitoring -from parmepy.mpi import main_rank, main_size, MPI +from parmepy.mpi import main_rank, MPI from parmepy.tools.timers import timed_function diff --git a/HySoP/hysop/problem/problem.py b/HySoP/hysop/problem/problem.py index db8783c6d8f0fad46a7208511e0ee65ade13d6f0..68b6efade181691c2cd6a4f79ed1b86433b3acc5 100644 --- a/HySoP/hysop/problem/problem.py +++ b/HySoP/hysop/problem/problem.py @@ -3,14 +3,12 @@ Complete problem description. """ -from parmepy.constants import debug +from parmepy.constants import debug, parmesPickle from parmepy import __VERBOSE__ from parmepy.operator.monitors.monitoring import Monitoring -from parmepy.operator.continuous import Operator from parmepy.operator.redistribute import Redistribute from parmepy.tools.timers import Timer, timed_function from parmepy.mpi import main_rank -from scitools.NumPyDB import NumPyDB_cPickle class Problem(object): @@ -205,7 +203,7 @@ class Problem(object): if filename is not None: self.filename = filename self._filedump = filename + '_rk_' + str(main_rank) - db = NumPyDB_cPickle(self._filedump, mode='store') + db = parmesPickle(self._filedump, mode='store') db.dump(self.simulation, 'simulation') for v in self.input: print 'dump v ...' @@ -224,7 +222,7 @@ class Problem(object): if filename is not None: self.filename = filename self._filedump = filename + '_rk_' + str(main_rank) - db = NumPyDB_cPickle(self._filedump, mode='load') + db = parmesPickle(self._filedump, mode='load') self.simulation = db.load('simulation')[0] self.simulation.reset() for v in self.input: diff --git a/HySoP/setup.py.in b/HySoP/setup.py.in index 2db7c5451fd6eb392f4982e0ece77532c5c240dd..3c75c97226a0edb1984fd3a8d8261a6be509fcda 100644 --- a/HySoP/setup.py.in +++ b/HySoP/setup.py.in @@ -28,6 +28,7 @@ packages = ['parmepy', packages_for_tests = ['parmepy.domain.tests', 'parmepy.fields.tests', 'parmepy.operator.tests', + 'parmepy.numerics.tests', 'parmepy.tools.tests', 'parmepy.problem.tests', 'parmepy.numerics.tests', diff --git a/HySoP/src/fftw/fft3d.f90 b/HySoP/src/fftw/fft3d.f90 index 5a4a8878acab95f16fbd30b14bbc24b745064de2..3adfe7d7e8e242642b47a4425c6b563d5e3132f0 100755 --- a/HySoP/src/fftw/fft3d.f90 +++ b/HySoP/src/fftw/fft3d.f90 @@ -296,7 +296,7 @@ contains call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) call fftw_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) call fftw_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3) - print *, "r2c time = ", MPI_WTIME() - start + !!print *, "r2c time = ", MPI_WTIME() - start end subroutine r2c_3d @@ -312,7 +312,7 @@ contains call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) call fftw_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3) - print *, "c2r time : ", MPI_WTIME() -start +!! print *, "c2r time : ", MPI_WTIME() -start ! copy back to velocity and normalisation do k =1, local_resolution(c_Z) do j = 1, fft_resolution(c_Y) @@ -411,7 +411,7 @@ contains ! compute transform (as many times as desired) start = MPI_WTIME() call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many) - print *, "r2c time = ", MPI_WTIME() - start +!! print *, "r2c time = ", MPI_WTIME() - start end subroutine r2c_3d_many @@ -426,7 +426,7 @@ contains start = MPI_WTIME() call fftw_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many) - print *, "c2r time : ", MPI_WTIME() -start +!! print *, "c2r time : ", MPI_WTIME() -start do k =1, local_resolution(c_Z) do j = 1, fft_resolution(c_Y) do i = 1, fft_resolution(c_X)