diff --git a/Examples/Multiphase/NS_planeJet_hybrid_MS_MP.py b/Examples/Multiphase/NS_planeJet_hybrid_MS_MP.py new file mode 100644 index 0000000000000000000000000000000000000000..912aa6cf966b226da2d4dc89f6a9a768b2c1c541 --- /dev/null +++ b/Examples/Multiphase/NS_planeJet_hybrid_MS_MP.py @@ -0,0 +1,698 @@ +#!/usr/bin/env python +# Scripts arguments: +# 1. Flow resolution +# 2. Scalar resolution +# 3. Dictionary for devices id: (mpi rank: device id) +# 4. Is the initial condition is perturbed +# 5. Is data output +# 6. Flow density +# 7. Jet density +# mpirun -np 10 python ./NS_planeJet_hybrid_MS_MP.py "[129,129,129]" "[257,257,257]" "{0:0,5:1}" "True" "True" "1" "3" +import sys +USER_NB_ELEM_UW = eval(sys.argv[1]) +USER_NB_ELEM_S = eval(sys.argv[2]) +USER_RANK_DEVICE_ID = eval(sys.argv[3]) +RANDOM_INIT = eval(sys.argv[4]) +IS_OUTPUT = eval(sys.argv[5]) +FLOW_DENSITY = eval(sys.argv[6]) +JET_DENSITY = eval(sys.argv[7]) +import hysop +import hysop.gpu +#hysop.gpu.CL_PROFILE = True +from hysop.constants import np, HDF5, ASCII, HYSOP_MPI_REAL +from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm +from hysop.tools.parameters import MPIParams, Discretization, IOParams +from hysop.problem.simulation import Simulation +from hysop.fields.variable_parameter import VariableParameter +from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\ + Support, Splitting, MultiScale, MultiScale, SpaceDiscretisation, \ + GhostUpdate, Scales, dtCrit, ExtraArgs +from hysop.numerics.integrators.runge_kutta2 import RK2 as RK +from hysop.numerics.integrators.runge_kutta3 import RK3 as RK3 +from hysop.numerics.finite_differences import FD_C_4 +from hysop.numerics.remeshing import L6_4 as remesh_formula +from hysop.numerics.remeshing import L2_1 +from hysop.numerics.remeshing import Linear +from hysop.operator.advection import Advection +from hysop.operator.diffusion import Diffusion +from hysop.operator.stretching import Stretching +from hysop.operator.poisson import Poisson +from hysop.operator.differential import Curl +from hysop.operator.adapt_timestep import AdaptTimeStep +from hysop.operator.custom import CustomMonitor +from hysop.operator.redistribute_inter import RedistributeInter +from hysop.operator.redistribute_intra import RedistributeIntra +from hysop.gpu.gpu_transfer import DataTransfer +from hysop.domain.subsets import SubBox +from hysop.operator.hdf_io import HDF_Writer +from hysop.operator.energy_enstrophy import EnergyEnstrophy +import hysop.tools.numpywrappers as npw +from hysop.tools.profiler import Profiler, FProfiler +from hysop.tools.io_utils import IO +from hysop.operator.baroclinic_from_rhs import BaroclinicFromRHS +from hysop.operator.multiresolution_filter import MultiresolutionFilter +from hysop.operator.multiphase_gradp import MultiphaseGradP +from hysop.operator.multiphase_baroclinic_rhs import MultiphaseBaroclinicRHS +from hysop.operator.spectrum import Spectrum +IO.set_default_path('/scratch_p/jmetancelin/PlaneJet_F{0}_J{1}'.format(FLOW_DENSITY,JET_DENSITY)) + +pi = np.pi +cos = np.cos +sin = np.sin +exp = np.exp +abs = np.abs +tanh = np.tanh + + +TASK_UW = 1 +TASK_SCALAR = 2 +PROC_TASKS = [TASK_UW, ] * main_size +for p in USER_RANK_DEVICE_ID: + PROC_TASKS[p] = TASK_SCALAR +try: + DEVICE_ID = USER_RANK_DEVICE_ID[main_rank] +except KeyError: + DEVICE_ID = None +out_freq = 10 +# Physical parameters: +# Flow viscosity +VISCOSITY = 1e-4 +# Schmidt number +SC = ((1. * USER_NB_ELEM_S[0] - 1.)/(1. * USER_NB_ELEM_UW[0] - 1.))**2 +# Scalar diffusivity +DIFF_COEFF_SCAL = VISCOSITY / SC + +width = 0.01 +ampl3 = 0.3 +ampl = 0.05 +ampl2 = 0.05 + + +ctime = MPI.Wtime() +setup_time = FProfiler("Setup") + +# Domain +box = hysop.Box(length=[1., 1., 1.], origin=[0., 0., 0.], + proc_tasks=PROC_TASKS) +mpi_params = MPIParams(comm=box.comm_task, task_id=PROC_TASKS[main_rank]) +mpi_params_S = MPIParams(comm=box.comm_task, task_id=TASK_SCALAR) +mpi_params_UW = MPIParams(comm=box.comm_task, task_id=TASK_UW) + + +def computeVel(res, x, y, z, t): + yy = abs(y - 0.5) + aux = (0.1 - 2. * yy) / (4. * width) + strg1 = exp(-abs(aux ** 2)) + strg2 = exp(-abs(aux ** 2)) + strg3 = exp(-abs(aux ** 2)) + if RANDOM_INIT: + from create_random_arrays import random_init + randX, randY, randZ = random_init(res[0].shape, box.comm_task) + strg1 = exp(-abs(aux ** 2)) * randX + strg2 = exp(-abs(aux ** 2)) * randY + strg3 = exp(-abs(aux ** 2)) * randZ + else: + strg1 = 0. + strg2 = 0. + strg3 = 0. + res[0][...] = 0.5 * (1. + tanh(aux)) + res[0][...] *= (1. + ampl3 * sin(8. * pi * x)) + res[0][...] *= (1. + ampl * strg1) + res[1][...] = ampl * strg2 + res[2][...] = ampl * strg3 + return res + + + +def initScal(res, x, y, z, t): + yy = abs(y - 0.5) + aux = (0.1 - 2. * yy) / (4. * width) + res[0][...] = 0.5 * (1. + tanh(aux)) + res[0][...] *= (1. + ampl3 * sin(8. * pi * x)) + return res + +temp_maxvelo = npw.zeros((3, )) +maxvelo_values = npw.zeros((3, )) + + +def calc_maxvelo(simu, v): + temp_maxvelo[0] = np.max(np.abs(v[0].data[0])) + temp_maxvelo[1] = np.max(np.abs(v[0].data[1])) + temp_maxvelo[2] = np.max(np.abs(v[0].data[2])) + v[0].topology.comm.Allreduce(sendbuf=[temp_maxvelo, 3, HYSOP_MPI_REAL], + recvbuf=[maxvelo_values, 3, HYSOP_MPI_REAL], + op=MPI.MAX) + return maxvelo_values + + +# Fields +velo = hysop.Field(domain=box, formula=computeVel, + name='Velocity', is_vector=True) +vorti = hysop.Field(domain=box, + name='Vorticity', is_vector=True) +scal = hysop.Field(domain=box, formula=initScal, + name='Scalar', is_vector=False) +gradp = hysop.Field(domain=box, + name='GradP', is_vector=True) +baroclinic_rhs = hysop.Field(domain=box, + name='B_rhs', is_vector=True) + + +data = {'dt': 0.01} +dt = VariableParameter(data) +simu = Simulation(tinit=0.0, tend=5., timeStep=0.01, iterMax=1000) + +# Flow discretizations: +d_F_0g = Discretization(USER_NB_ELEM_UW) +d_F_2g = Discretization(USER_NB_ELEM_UW, [2, ] * 3) +# Scalar discretization +d_S_0g = Discretization(USER_NB_ELEM_S) +# Velocity discretization for scalar advection +if USER_NB_ELEM_UW[0] == USER_NB_ELEM_S[0]: + d_F_1g = Discretization(USER_NB_ELEM_UW) +else: + d_F_1g = Discretization(USER_NB_ELEM_UW, [1, ] * 3) + +# Topologies +topo_S_0g_1d = None +topo_F_1g_1d = None +topo_F_0g_1d = None +topo_F_2g_1d = None +topo_F_0g_2d = None +topo_F_0g_2d = box.create_topology( + d_F_0g, dim=2, mpi_params=mpi_params) +topo_F_2g_1d = box.create_topology( + d_F_2g, dim=1, mpi_params=mpi_params) +topo_F_0g_1d = box.create_topology( + d_F_0g, dim=1, mpi_params=mpi_params) +topo_F_1g_1d = box.create_topology( + d_F_1g, dim=1, mpi_params=mpi_params) +topo_S_0g_1d = box.create_topology( + d_S_0g, dim=1, mpi_params=mpi_params) + + +# Operators +# GPU operators +advec_scal_method = {TimeIntegrator: RK, + Interpolation: Linear, + Remesh: remesh_formula, + Support: 'gpu_2k', + Splitting: 'o2', + MultiScale: Linear} +if PROC_TASKS.count(TASK_SCALAR) == 1: + advec_scal_method[ExtraArgs] = {'device_id': DEVICE_ID, + 'device_type': 'gpu'} +else: + advec_scal_method[ExtraArgs] = {'max_velocity': [1.2, 0.7, 0.7], + 'max_dt': 0.012, + 'device_id': DEVICE_ID, + 'device_type': 'gpu', + 'velocity_only_on_device': False} +advec_scal = Advection(velo, + discretization=topo_F_1g_1d, + variables={scal: topo_S_0g_1d}, + mpi_params=mpi_params_S, + method=advec_scal_method) +# diffusion_scal = Diffusion(viscosity=DIFF_COEFF_SCAL, +# vorticity=scal, +# discretization=topo_S_0g_1d, +# mpi_params=mpi_params_S, +# method={Support: 'gpu', +# SpaceDiscretisation: 'fd', +# ExtraArgs: {'device_id': DEVICE_ID, +# 'device_type': 'gpu'}}) +# diffusion_scal.name += '_(Scalar)' +# tanh(x) is in [-1:1] and -1 stand for the flow. +# We want [FLOW_DENSITY:JET_DENSITY] if FLOW_DENSITY<JET_DENSITY +# We want [JET_DENSITY:FLOW_DENSITY] if FLOW_DENSITY>JET_DENSITY +if FLOW_DENSITY < JET_DENSITY: + scal_to_rho = '{0:f}*(0.5*tanh(100.0*x-50.0)+0.5)+{1:f}'.format( + abs(FLOW_DENSITY - JET_DENSITY), + min(FLOW_DENSITY, JET_DENSITY)) +else: + scal_to_rho = '{0:f}*(-0.5*tanh(100.0*x-50.0)+0.5)+{1:f}'.format( + abs(FLOW_DENSITY - JET_DENSITY), + min(FLOW_DENSITY, JET_DENSITY)) +print +baroclinic_rhs_op = MultiphaseBaroclinicRHS( + baroclinic_rhs, scal, gradp, + variables={baroclinic_rhs: topo_S_0g_1d, + gradp: topo_F_2g_1d, + scal: topo_S_0g_1d}, + method={Support: 'gpu', + SpaceDiscretisation: FD_C_4, + ExtraArgs: {'density_func': scal_to_rho, + 'device_id': DEVICE_ID, + 'device_type': 'gpu'}}, + mpi_params=mpi_params_S) +filter_scal = MultiresolutionFilter( + d_in=topo_S_0g_1d, d_out=topo_F_2g_1d, + variables={baroclinic_rhs: topo_F_2g_1d}, + method={Remesh: L2_1, + Support: 'gpu', + ExtraArgs: {'device_id': DEVICE_ID, + 'device_type': 'gpu'}}, + mpi_params=mpi_params_S) +gradp_op = MultiphaseGradP(velocity=velo, gradp=gradp, viscosity=VISCOSITY, + discretization=topo_F_2g_1d, + mpi_params=mpi_params_S, + method={SpaceDiscretisation: FD_C_4, + ExtraArgs: {'gravity': [0., 0., 0.]}}) + +# CPU operators +advec = Advection(velo, + discretization=topo_F_0g_2d, + variables={vorti: topo_F_0g_2d}, + mpi_params=mpi_params_UW, + method={Scales: 'p_64', MultiScale: 'L4_4'}) +stretch = Stretching(velo, vorti, discretization=topo_F_2g_1d, + mpi_params=mpi_params_UW) +baroclinic = BaroclinicFromRHS(vorti, baroclinic_rhs, + discretization=topo_F_0g_1d, + mpi_params=mpi_params_UW) +diffusion = Diffusion(variables={vorti: topo_F_0g_1d}, + viscosity=VISCOSITY, + mpi_params=mpi_params_UW) +poisson = Poisson(velo, vorti, discretization=topo_F_0g_1d, + mpi_params=mpi_params_UW) +c = Curl(velo, vorti, discretization=topo_F_0g_1d, + method={SpaceDiscretisation: 'fftw', GhostUpdate: True}, + mpi_params=mpi_params_UW) +#dt_output = None +#if IS_OUTPUT: +dt_output = IOParams(frequency=1, filename='dt.dat', fileformat=ASCII) +dt_adapt = AdaptTimeStep(velo, vorti, + simulation=simu, + time_range=[0, np.infty], + discretization=topo_F_2g_1d, + method={TimeIntegrator: RK3, + SpaceDiscretisation: FD_C_4, + dtCrit: ['gradU', 'cfl', ]}, + lcfl=0.15, + cfl=1.5, + io_params=dt_output, + mpi_params=mpi_params_UW) + +# Operators discretizations +if box.is_on_task(TASK_SCALAR): + for op in (advec_scal, gradp_op, filter_scal, baroclinic_rhs_op): # , diffusion_scal): + op.discretize() +if box.is_on_task(TASK_UW): + for op in (advec, stretch, diffusion, poisson, c, dt_adapt, baroclinic): + op.discretize() + +if IS_OUTPUT: + # Defining subdomains + L_V = 1. - 1. / (1. * USER_NB_ELEM_UW[0]) + L_S = 1. - 1. / (1. * USER_NB_ELEM_S[0]) + XY_plane_v = SubBox(origin=[0., 0., 0.5], length=[L_V, L_V, 0.], + parent=box) + XZ_plane_v = SubBox(origin=[0., 0.5, 0.], length=[L_V, 0., L_V], + parent=box) + XY_plane_s = SubBox(origin=[0., 0., 0.5], length=[L_S, L_S, 0.], + parent=box) + XZ_plane_s = SubBox(origin=[0., 0.5, 0.], length=[L_S, 0., L_S], + parent=box) + # Defining output operators + p_velo = HDF_Writer(variables={velo: topo_F_0g_1d}, + mpi_params=mpi_params_UW, + io_params=IOParams(frequency=out_freq, + filename='flow', + fileformat=HDF5)) + p_velo_xy = HDF_Writer(variables={velo: topo_F_0g_1d, + vorti: topo_F_0g_1d}, + var_names={velo: 'Velocity', vorti: 'Vorticity'}, + subset=XY_plane_v, + mpi_params=mpi_params_UW, + io_params=IOParams(frequency=out_freq, + filename='flow_XY', + fileformat=HDF5)) + p_velo_xz = HDF_Writer(variables={velo: topo_F_0g_1d, + vorti: topo_F_0g_1d}, + var_names={velo: 'Velocity', vorti: 'Vorticity'}, + subset=XZ_plane_v, + mpi_params=mpi_params_UW, + io_params=IOParams(frequency=out_freq, + filename='flow_XZ', + fileformat=HDF5)) + p_gradp_xy = HDF_Writer(variables={gradp: topo_F_2g_1d}, + var_names={gradp: 'gradp'}, + subset=XY_plane_s, + mpi_params=mpi_params_S, + io_params=IOParams(frequency=out_freq, + filename='gradp_XY', + fileformat=HDF5)) + # p_rhs_xy = HDF_Writer(variables={baroclinic_rhs: topo_S_0g_1d}, + # var_names={baroclinic_rhs: 'RHS'}, + # subset=XY_plane_s, + # mpi_params=mpi_params_S, + # io_params=IOParams(frequency=out_freq, + # filename='rhs_XY', + # fileformat=HDF5)) + p_scal_xy = HDF_Writer(variables={scal: topo_S_0g_1d}, + var_names={scal: 'Scalar'}, + subset=XY_plane_s, + mpi_params=mpi_params_S, + io_params=IOParams(frequency=out_freq, + filename='scal_XY', + fileformat=HDF5)) + p_scal_xz = HDF_Writer(variables={scal: topo_S_0g_1d}, + var_names={scal: 'Scalar'}, + subset=XZ_plane_s, + mpi_params=mpi_params_S, + io_params=IOParams(frequency=out_freq, + filename='scal_XZ', + fileformat=HDF5)) + p_scal_xy.name += '_(Scalar)' + p_scal_xz.name += '_(Scalar)' + energy = EnergyEnstrophy(velocity=velo, + vorticity=vorti, + discretization=topo_F_0g_1d, + mpi_params=mpi_params_UW, + io_params=IOParams(frequency=1, + filename='energy.dat', + fileformat=ASCII)) + maxvelo = CustomMonitor(variables={velo: topo_F_0g_1d}, + function=calc_maxvelo, + res_shape=3, + mpi_params=mpi_params_UW, + io_params=IOParams(frequency=1, + filename='maxvelo.dat', + fileformat=ASCII)) + if box.is_on_task(TASK_UW): + for op in (p_velo, p_velo_xy, p_velo_xz, energy, maxvelo): + op.discretize() + if box.is_on_task(TASK_SCALAR): + for op in (p_scal_xy, p_scal_xz, p_gradp_xy): + op.discretize() + +# Redistribute operators +# CPU redistributes +RF_vorti_0g2d_2g1d = RedistributeIntra(variables=[vorti], # W_C_2d_to_2G + source=advec, target=stretch, + mpi_params=mpi_params_UW) +RF_vorti_0g2d_2g1d.name += '_toG_W' +RF_velo_0g1d_2g1d = RedistributeIntra(variables=[velo], # U_C_1d_to_C_2d_2G + source=poisson, target=stretch, + run_till=[stretch, dt_adapt], + mpi_params=mpi_params_UW) +RF_velo_0g1d_2g1d.name += '_toG_V' +toDev_velo_1g1d = DataTransfer(source=topo_F_1g_1d, target=advec_scal, + variables={velo: topo_F_1g_1d}, + run_till=[advec_scal, gradp_op], + mpi_params=mpi_params_S) +toDev_velo_1g1d.name += '_ToDev_V' +RS_velo_0g1d_1g1d = RedistributeIntra(variables=[velo], # U_C_1d_to_C_2d_2G + source=topo_F_0g_1d, target=topo_F_1g_1d, + run_till=[toDev_velo_1g1d], + mpi_params=mpi_params_S) +RS_velo_0g1d_1g1d.name += '_GPUtoG_V' +RF_vorti_0g1d_2g1d = RedistributeIntra(variables=[vorti], # W_C_1d_to_C_2d_2G + source=diffusion, target=dt_adapt, + mpi_params=mpi_params_UW) +RF_vorti_0g1d_2g1d.name += '_toG_W' +RF_velo_0g1d_0g2d = RedistributeIntra(variables=[velo], # U_C_1d_to_C_2d + source=poisson, target=advec, + mpi_params=mpi_params_UW) +RF_velo_0g1d_0g2d.name += '_toScales_V' +RF_vorti_0g1d_0g2d = RedistributeIntra(variables=[vorti], # W_C_1d_to_C_2d + source=poisson, target=advec, + mpi_params=mpi_params_UW) +RF_vorti_0g1d_0g2d.name += '_toScales_W' +RF_vorti_2g1d_0g1d = RedistributeIntra(variables=[vorti], # W_C_2d_2G_to_1d + source=baroclinic, #stretch, + target=diffusion, + mpi_params=mpi_params_UW) +RF_vorti_2g1d_0g1d.name += '_FromG_W' + +F0g1d_to_S0g1d_velo = RedistributeInter(variables=[velo], + parent=main_comm, + source=topo_F_0g_1d, target=topo_F_0g_1d, + source_id=TASK_UW, target_id=TASK_SCALAR, + run_till=[gradp_op, RS_velo_0g1d_1g1d]) +S2g1d_to_F2g1d_rhs = RedistributeInter(variables=[baroclinic_rhs], + parent=main_comm, + source=topo_F_2g_1d, target=topo_F_0g_1d, + source_id=TASK_SCALAR, target_id=TASK_UW, + run_till=[baroclinic]) +S2g1d_to_F2g1d_rhs.name += '_rhs_GPU_to_CPU' +toHost_rhs_2g1d = DataTransfer(source=topo_F_2g_1d, + target=S2g1d_to_F2g1d_rhs, + variables={baroclinic_rhs: topo_F_2g_1d}, + mpi_params=mpi_params_S) +toHost_rhs_2g1d.name += '_ToHost_RHS' +toDev_gradp_2g1d = DataTransfer(source=topo_F_2g_1d, target=baroclinic_rhs_op, + variables={gradp: topo_F_2g_1d}, + mpi_params=mpi_params_S) +toDev_gradp_2g1d.name += '_ToDev_GradP' +if IS_OUTPUT: + toHost_scal_0g1d = DataTransfer(source=topo_S_0g_1d, target=p_scal_xy, + variables={scal: topo_S_0g_1d}, + mpi_params=mpi_params_S, + freq=out_freq, + run_till=[p_scal_xz, ]) + toHost_scal_0g1d.name += '_ToHost_S' + +# Operators setup +if box.is_on_task(TASK_SCALAR): + for op in (advec_scal, gradp_op, baroclinic_rhs_op, filter_scal): # , diffusion_scal): + op.setup() + if IS_OUTPUT: + for op in (p_gradp_xy, p_scal_xy, p_scal_xz, toHost_scal_0g1d): + op.setup() + for op in (toDev_velo_1g1d, toHost_rhs_2g1d, toDev_gradp_2g1d, RS_velo_0g1d_1g1d): + op.setup() +if box.is_on_task(TASK_UW): + for op in (advec, stretch, diffusion, poisson, c, dt_adapt, baroclinic): + op.setup() + if IS_OUTPUT: + for op in (p_velo, p_velo_xy, p_velo_xz, energy, maxvelo): + op.setup() + for op in (RF_vorti_0g2d_2g1d, RF_vorti_2g1d_0g1d, RF_velo_0g1d_2g1d, RF_vorti_0g1d_2g1d, + RF_velo_0g1d_0g2d, RF_vorti_0g1d_0g2d): + op.setup() +# Wait for all operators setup before setup the intra-comm redistribute +main_comm.Barrier() +F0g1d_to_S0g1d_velo.setup() +S2g1d_to_F2g1d_rhs.setup() + +# Operators list +if IS_OUTPUT: + operators_list = [RS_velo_0g1d_1g1d, toDev_velo_1g1d, ## Scal + gradp_op, p_gradp_xy, toDev_gradp_2g1d, ## Scal + advec, RF_vorti_0g2d_2g1d, stretch, ## Flow + advec_scal,# diffusion_scal, ## Scal + toHost_scal_0g1d, p_scal_xy, p_scal_xz, ## Scal + baroclinic_rhs_op, filter_scal, toHost_rhs_2g1d, ## Scal + S2g1d_to_F2g1d_rhs, ## Scal->Flow + baroclinic, ## Flow + diffusion, poisson, ## Flow + F0g1d_to_S0g1d_velo, ## Flow->Scal + p_velo, p_velo_xy, p_velo_xz, energy, maxvelo, ## Flow + RF_velo_0g1d_2g1d, RF_vorti_0g1d_2g1d, ## Flow + RF_velo_0g1d_0g2d, RF_vorti_0g1d_0g2d, ## Flow + dt_adapt, ## Flow + ] +else: + raise RuntimeError('') + # operators_list = [F0g1d_to_S2g1d_velo, + # toDev, advec_scal, diffusion_scal, + # advec, RF_vorti_0g2d_2g1d, stretch, + # RF_vorti_2g1d_0g1d, diffusion, poisson, + # RF_velo_0g1d_2g1d, RF_velo_0g1d_0g2d, RF_vorti_0g1d_0g2d, dt_adapt] + +# Fields initializations +if box.is_on_task(TASK_SCALAR): + scal.initialize(topo=topo_S_0g_1d) + advec_dirX = advec_scal.advec_dir[0].discrete_op + gpu_scal = advec_dirX.fields_on_grid[0] + gpu_pscal = advec_dirX.fields_on_part[gpu_scal][0] + #diffusion_scal.discrete_op.set_field_tmp(gpu_pscal) + if IS_OUTPUT: + p_scal_xy.apply(simu) + p_scal_xz.apply(simu) +if box.is_on_task(TASK_UW): + velo.initialize(topo=topo_F_0g_1d) + c.apply(simu) + poisson.apply(simu) + RF_velo_0g1d_2g1d.apply(simu) + RF_velo_0g1d_0g2d.apply(simu) + RF_vorti_0g1d_0g2d.apply(simu) + RF_vorti_0g1d_2g1d.apply(simu) + if IS_OUTPUT: + p_velo.apply(simu) + p_velo_xy.apply(simu) + p_velo_xz.apply(simu) +F0g1d_to_S0g1d_velo.apply(simu) +F0g1d_to_S0g1d_velo.wait() +if box.is_on_task(TASK_SCALAR): + RS_velo_0g1d_1g1d.apply(simu) + RS_velo_0g1d_1g1d.wait() + toDev_velo_1g1d.apply(simu) + toDev_velo_1g1d.wait() + gradp_op.initialize_velocity() +simu.initialize() +setup_time += MPI.Wtime() - ctime +main_comm.Barrier() + +# Solve +total_time = FProfiler("Total") +solve_time = FProfiler("Solve") +cttime = MPI.Wtime() +while not simu.isOver: + ctime = MPI.Wtime() + if main_rank == 0: + simu.printState() + for op in operators_list: + if box.is_on_task(op.task_id()): + op.apply(simu) + if box.is_on_task(TASK_SCALAR): + # Wait gpu operations on scalar + advec_scal.advec_dir[0].discreteFields[scal].wait() + solve_time += MPI.Wtime() - ctime + dt_adapt.wait() + # Synchronize threads + main_comm.Barrier() + simu.advance() + +main_comm.Barrier() +nb_ite = simu.currentIteration +total_time += MPI.Wtime() - cttime +simu.finalize() + + +# if IS_OUTPUT: +# if box.is_on_task(TASK_SCALAR): +# p_scal_xy.apply(simu) +# p_scal_xz.apply(simu) +# if box.is_on_task(TASK_UW): +# p_velo.apply(simu) +# p_velo_xy.apply(simu) +# p_velo_xz.apply(simu) + + +# prof = Profiler(None, box.comm_task) +# prof += setup_time +# prof += solve_time +# for op in operators_list: +# if box.is_on_task(op.task_id()): +# op.finalize() +# prof += op.profiler +# for v in (velo, vorti, scal): +# prof += v.profiler +# prof.summarize() + +# if box.is_on_task(TASK_SCALAR): +# prof.write(prefix=' '.join([str(s-1) for s in USER_NB_ELEM_UW]) + +# ' ' + str(nb_ite) + +# ' ' + str(PROC_TASKS[main_rank]) + +# ' ' + str(PROC_TASKS.count(PROC_TASKS[main_rank])), +# hprefix='Nx Ny Nz Nite Task Np') +# main_comm.Barrier() +# if box.is_on_task(TASK_UW): +# prof.write(prefix=' '.join([str(s-1) for s in USER_NB_ELEM_UW]) + +# ' ' + str(nb_ite) + +# ' ' + str(PROC_TASKS[main_rank]) + +# ' ' + str(PROC_TASKS.count(PROC_TASKS[main_rank])), +# hprefix='Nx Ny Nz Nite Task Np') + + +# if main_rank < 2: +# print prof +# for i in xrange(main_size): +# if i == main_rank: +# print prof +# main_comm.Barrier() +# main_comm.Barrier() + +velo.finalize() +if box.is_on_task(TASK_SCALAR): + scal.finalize() +if box.is_on_task(TASK_UW): + vorti.finalize() +main_comm.Barrier() + + +# def extract_time(prof, key): +# if key is None: +# return prof.t +# if prof is None: +# return 0.0 +# return prof.profiler[key].t + +# times = npw.asrealarray([np.sum([extract_time(pf, k) for pf, k in l]) for l in ( +# ((total_time, None), ), +# ((solve_time, None), ), +# ((dt_adapt, 'apply'), ), +# ((advec, 'apply'), ), +# ((stretch, 'apply'), ), +# ((diffusion, 'apply'), ), +# ((poisson, 'apply'), ), +# ((advec_scal.advec_dir[0], 'apply'), ), +# ((advec_scal.advec_dir[0].discrete_op, 'OpenCL_copy'), +# (advec_scal.advec_dir[0].discrete_op, 'OpenCL_transpose_xy'), +# (advec_scal.advec_dir[0].discrete_op, 'OpenCL_advection_and_remeshing'), ), +# ((advec_scal.advec_dir[1], 'apply'), ), +# ((advec_scal.advec_dir[1].discrete_op, 'OpenCL_transpose_xy'), +# (advec_scal.advec_dir[1].discrete_op, 'OpenCL_transpose_xz'), +# (advec_scal.advec_dir[1].discrete_op, 'OpenCL_advection_and_remeshing'), +# (advec_scal.advec_dir[1].discrete_op, 'OpenCL_buff_advec_and_remesh_l'), +# (advec_scal.advec_dir[1].discrete_op, 'OpenCL_buff_advec_and_remesh'), +# (advec_scal.advec_dir[1].discrete_op, 'OpenCL_buff_advec_and_remesh_r'), ), +# ((advec_scal.advec_dir[1].discrete_op, 'comm_gpu_advec_set'), +# (advec_scal.advec_dir[1].discrete_op, 'comm_cpu_advec'), +# (advec_scal.advec_dir[1].discrete_op, 'comm_cpu_advec_get'), +# (advec_scal.advec_dir[1].discrete_op, 'comm_gpu_remesh_get'), +# (advec_scal.advec_dir[1].discrete_op, 'comm_gpu_remesh_set_loc'), +# (advec_scal.advec_dir[1].discrete_op, 'comm_gpu_remesh_get_loc'), +# (advec_scal.advec_dir[1].discrete_op, 'comm_cpu_remesh'), ), +# ((advec_scal.advec_dir[2], 'apply'), ), +# ((advec_scal.advec_dir[2].discrete_op, 'OpenCL_transpose_xz'), +# (advec_scal.advec_dir[2].discrete_op, 'OpenCL_advection_and_remeshing'), +# (advec_scal.advec_dir[2].discrete_op, 'OpenCL_buff_advec_and_remesh_l'), +# (advec_scal.advec_dir[2].discrete_op, 'OpenCL_buff_advec_and_remesh'), +# (advec_scal.advec_dir[2].discrete_op, 'OpenCL_buff_advec_and_remesh_r'), ), +# ((advec_scal.advec_dir[2].discrete_op, 'comm_gpu_advec_set'), +# (advec_scal.advec_dir[2].discrete_op, 'comm_cpu_advec'), +# (advec_scal.advec_dir[2].discrete_op, 'comm_cpu_advec_get'), +# (advec_scal.advec_dir[2].discrete_op, 'comm_gpu_remesh_get'), +# (advec_scal.advec_dir[2].discrete_op, 'comm_gpu_remesh_set_loc'), +# (advec_scal.advec_dir[2].discrete_op, 'comm_gpu_remesh_get_loc'), +# (advec_scal.advec_dir[2].discrete_op, 'comm_cpu_remesh'), ), +# # ((diffusion_scal, 'apply'), ), +# # ((diffusion_scal.discrete_op, 'OpenCL_enqueue_copy'), +# # (diffusion_scal.discrete_op, 'OpenCL_diffusion'), ), +# # ((diffusion_scal.discrete_op, 'comm_diffusion'), ), +# ((RF_vorti_0g2d_2g1d, 'apply'), +# (RF_vorti_2g1d_0g1d, 'apply'), +# (RF_velo_0g1d_0g2d, 'apply'), +# (RF_vorti_0g1d_0g2d, 'apply'), +# (F0g1d_to_S2g1d_velo, 'apply'), +# (toDev_velo_1g1d, 'apply'), ), +# )]) +# tl = "Total Solve Dt Avection(w) Stretching Diffusion(w) Poisson " +# tl += "Advection(s)_X Advection(s)_CL_X " +# tl += "Advection(s)_Y Advection(s)_CL_Y Advection_Comm_Y " +# tl += "Advection(s)_Z Advection(s)_CL_Z Advection_Comm_Z " +# tl += "Diffusion(s) Diffusion(s)_CL Diffusion(s)_Comm Comm" + + +# times_task = npw.zeros_like(times) +# box.comm_task.Reduce([times, times.size, HYSOP_MPI_REAL], +# [times_task, times.size, HYSOP_MPI_REAL]) +# times_task /= (1.0 * nb_ite * PROC_TASKS.count(PROC_TASKS[main_rank])) +# if PROC_TASKS[main_rank] == TASK_UW: +# p_l = ["FLOW", ] +# if PROC_TASKS[main_rank] == TASK_SCALAR: +# p_l = ["SCALAR", ] +# p_l += [PROC_TASKS[main_rank], nb_ite] + USER_NB_ELEM_UW + USER_NB_ELEM_S +# p_l += [PROC_TASKS.count(PROC_TASKS[main_rank]), ] +# pp_l = "Label Task Ite UW_X UW_Y UW_Z S_X S_Y S_Z Np" +# for i in xrange(main_size): +# if main_rank == i: +# if i == 0: +# print pp_l + ' ' + tl +# if box.comm_task.Get_rank() == 0: +# print ' '.join([str(e) for e in p_l]) + \ +# ' ' + ' '.join([str(e) for e in times_task]) +# main_comm.Barrier() diff --git a/Examples/Multiphase/RTI.py b/Examples/Multiphase/RTI.py new file mode 100644 index 0000000000000000000000000000000000000000..9df0bb972d09f480fb68ae2599705b5d56cd570c --- /dev/null +++ b/Examples/Multiphase/RTI.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python +# Rayleigh-Taylor Instability +# Scripts arguments: +# 1. Flow resolution +# 2. Scalar resolution +# 3. Dictionary for devices id: (mpi rank: device id) +# 4. Is data output +# 5. Low density +# 6. High density +# mpirun -np 10 python ./RTI_new.py "[129,129,129]" "[257,257,257]" "{0:0,5:1}" "True" "1" "3" +import sys +USER_NB_ELEM_UW = eval(sys.argv[1]) +USER_NB_ELEM_S = eval(sys.argv[2]) +USER_RANK_DEVICE_ID = eval(sys.argv[3]) +IS_OUTPUT = eval(sys.argv[4]) +LOW_DENSITY = eval(sys.argv[5]) +HIGH_DENSITY = eval(sys.argv[6]) +import hysop +# import hysop.gpu +# hysop.gpu.CL_PROFILE = True +from hysop.constants import np, HDF5, ASCII, HYSOP_MPI_REAL +from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm +from hysop.tools.parameters import MPIParams, Discretization, IOParams +from hysop.problem.simulation import Simulation +from hysop.fields.variable_parameter import VariableParameter +from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\ + Support, Splitting, MultiScale, MultiScale, SpaceDiscretisation, \ + GhostUpdate, Scales, dtCrit, ExtraArgs +from hysop.numerics.integrators.runge_kutta2 import RK2 as RK +from hysop.numerics.integrators.runge_kutta3 import RK3 as RK3 +from hysop.numerics.finite_differences import FD_C_4 +from hysop.numerics.remeshing import L6_4 as remesh_formula +from hysop.numerics.remeshing import L2_1 +from hysop.numerics.remeshing import Linear +from hysop.operator.advection import Advection +from hysop.operator.diffusion import Diffusion +from hysop.operator.stretching import Stretching +from hysop.operator.poisson import Poisson +from hysop.operator.baroclinic_from_rhs import BaroclinicFromRHS +from hysop.operator.differential import Curl +from hysop.operator.multiresolution_filter import MultiresolutionFilter +from hysop.operator.multiphase_gradp import MultiphaseGradP +from hysop.operator.multiphase_baroclinic_rhs import MultiphaseBaroclinicRHS +from hysop.operator.adapt_timestep import AdaptTimeStep +from hysop.operator.custom import CustomMonitor +from hysop.operator.redistribute_inter import RedistributeInter +from hysop.operator.redistribute_intra import RedistributeIntra +from hysop.gpu.gpu_transfer import DataTransfer +from hysop.domain.subsets import SubBox +from hysop.operator.hdf_io import HDF_Writer +from hysop.operator.energy_enstrophy import EnergyEnstrophy +import hysop.tools.numpywrappers as npw +from hysop.tools.io_utils import IO +from hysop.operator.spectrum import Spectrum +pi = np.pi +cos = np.cos +sin = np.sin +exp = np.exp +abs = np.abs +tanh = np.tanh + +TASK_UW = 1 +TASK_SCALAR = 2 +PROC_TASKS = [TASK_UW, ] * main_size +for p in USER_RANK_DEVICE_ID: + PROC_TASKS[p] = TASK_SCALAR +try: + DEVICE_ID = USER_RANK_DEVICE_ID[main_rank] +except KeyError: + DEVICE_ID = None +print USER_RANK_DEVICE_ID +out_freq = 10 +# Physical parameters: +# Flow viscosity +VISCOSITY = 5e-5 +# Schmidt number +SC = ((1. * USER_NB_ELEM_S[0] - 1.)/(1. * USER_NB_ELEM_UW[0] - 1.))**2 +# Scalar diffusivity +DIFF_COEFF_SCAL = VISCOSITY / SC +MAX_DT = 0.05 # ok with 0.05 +MAX_CFL = 1.5 # ok with 1.5 (lcfl bound) +MAX_LCFL = 0.15 # ok with 0.2 +#MAX_CFL_VEC = [1.5, 0.6, 0.5] + + +width = 0.01 +ampl3 = 0.3 +ampl = 0.05 +ampl2 = 0.05 + +# NS +# CPU :: Velo(C_2d), Vorti(C_2d) > Advec > Vorti(C_2d) +# CPU :: Velo(C_2d,2G), Vorti(C_2d,2G) > Stretching > Vorti(C_2d,2G) +# CPU :: Vorti(C_2d,2G) > Penalisation > Vorti(C_2d,2G) +# CPU :: Vorti(C_1d) > Diffusion > Vorti(C_1d) +# CPU :: Vorti(C_1d) > Poisson > Velo(C_1d) +# CPU :: Velo(C_2d,2G), Vorti(C_2d,2G) > Dt +# Scal +# GPU :: Velo(C_2d,1G), Scal(F_2d) > Advec > Scal(F_2d) +# Baroclinic +# GPU :: Velo(C_2d,2G) > GradP > gradp(C_2d,2G) +# GPU :: Rho(F_2d), gradp(C_2d,2G) > BaroclinicRHS > rhs(F_2d) +# GPU :: rhs(F_2d) > Filter > rhs(C_2d,2G) +# CPU :: rhs(C_2d,2G), Vorti(C_2d,2G) > Baroclinic > Vorti(C_2d,2G) + +ctime = MPI.Wtime() + +# Domain +box = hysop.Box(length=[1., 1., 1.], origin=[0., 0., 0.], + proc_tasks=PROC_TASKS) +mpi_params = MPIParams(comm=box.comm_task, task_id=PROC_TASKS[main_rank]) +mpi_params_S = MPIParams(comm=box.comm_task, task_id=TASK_SCALAR) +mpi_params_UW = MPIParams(comm=box.comm_task, task_id=TASK_UW) + +SIGMA=1e-3 +NOISE=1e-4 + +def computeVelo(res, x, y, z, t): + zz = exp(-(z-0.5) * (z-0.5) / SIGMA) * NOISE + from create_random_arrays import random_init + randX, randY, randZ = random_init(res[0].shape, box.comm_task) + res[0][...] = zz * randX + res[1][...] = zz * randY + res[2][...] = zz * randZ + return res + +def initScal(res, x, y, z, t): + res[0][...] = z + return res + + +temp_maxvelo = npw.zeros((3, )) +maxvelo_values = npw.zeros((3, )) + + +def calc_maxvelo(simu, v): + temp_maxvelo[0] = np.max(np.abs(v[0].data[0])) + temp_maxvelo[1] = np.max(np.abs(v[0].data[1])) + temp_maxvelo[2] = np.max(np.abs(v[0].data[2])) + v[0].topology.comm.Allreduce(sendbuf=[temp_maxvelo, 3, HYSOP_MPI_REAL], + recvbuf=[maxvelo_values, 3, HYSOP_MPI_REAL], + op=MPI.MAX) + return maxvelo_values + + +# Fields +velo = hysop.Field(domain=box, formula=computeVelo, + name='Velocity', is_vector=True) +vorti = hysop.Field(domain=box, + name='Vorticity', is_vector=True) +scal = hysop.Field(domain=box, formula=initScal, + name='Scalar', is_vector=False) +gradp = hysop.Field(domain=box, + name='GradP', is_vector=True) +baroclinic_rhs = hysop.Field(domain=box, + name='B_rhs', is_vector=True) + +data = {'dt': 0.001} +dt = VariableParameter(data) +simu = Simulation(tinit=0.0, tend=2.5, timeStep=0.001, iterMax=10000) + +# Flow discretizations: +d_C = Discretization(USER_NB_ELEM_UW) +d_C_2G = Discretization(USER_NB_ELEM_UW, [2, ] * 3) +# Scalar discretization +d_F = Discretization(USER_NB_ELEM_S) + +# Topologies +topo_C_2d = None +topo_C_2d_2G = None +topo_C_1d = None +topo_F_2d = None +topo_C_2d_2G = box.create_topology( + d_C_2G, dim=1, mpi_params=mpi_params) +topo_C_2d = box.create_topology( + d_C, dim=2, mpi_params=mpi_params) +topo_C_1d = box.create_topology( + d_C, dim=1, mpi_params=mpi_params) +topo_F_2d = box.create_topology( + d_F, dim=1, mpi_params=mpi_params) +###### WARNING This topo is 1D to be fftw compliant. + + +# Operators +# GPU operators +advec_scal_method = {TimeIntegrator: RK, + Interpolation: Linear, + Remesh: remesh_formula, + Support: 'gpu_1k', + Splitting: 'o2', + MultiScale: Linear} +if PROC_TASKS.count(TASK_SCALAR) == 1: + advec_scal_method[ExtraArgs] = {'device_id': DEVICE_ID, + 'device_type': 'gpu'} +else: + advec_scal_method[ExtraArgs] = {'max_cfl': MAX_CFL, + 'max_velocity': [2.5, 2.5, 3.5], # for rho=1<->3 + 'max_dt': MAX_DT, + 'device_id': DEVICE_ID, + 'device_type': 'gpu'} +advec_scal = Advection(velo, + discretization=topo_C_2d_2G, + variables={scal: topo_F_2d}, + mpi_params=mpi_params_S, + method=advec_scal_method) +diffusion_scal = Diffusion(viscosity=DIFF_COEFF_SCAL, + vorticity=scal, + discretization=topo_F_2d, + mpi_params=mpi_params_S, + method={Support: 'gpu', + SpaceDiscretisation: 'fd', + ExtraArgs: {'device_id': DEVICE_ID, + 'device_type': 'gpu'}}) +diffusion_scal.name += '_(Scalar)' +# tanh(x) is in [-1:1] and -1 stand for the flow. +# We want [LOW_DENSITY:HIGH_DENSITY] +scal_to_rho = '{0:f}*({1}*tanh(100.0*x-50.0)+0.5)+{2:f}'.format( + abs(LOW_DENSITY - HIGH_DENSITY), + 0.5 if LOW_DENSITY < HIGH_DENSITY else -0.5, + min(LOW_DENSITY, HIGH_DENSITY)) +baroclinic_rhs_op = MultiphaseBaroclinicRHS( + baroclinic_rhs, scal, gradp, + variables={baroclinic_rhs: topo_F_2d, + gradp: topo_C_2d_2G, + scal: topo_F_2d}, + method={Support: 'gpu', + SpaceDiscretisation: FD_C_4, + ExtraArgs: {'density_func': scal_to_rho, + 'device_id': DEVICE_ID, + 'device_type': 'gpu'}}, + mpi_params=mpi_params_S) +filter_scal = MultiresolutionFilter( + d_in=topo_F_2d, d_out=topo_C_2d_2G, + variables={baroclinic_rhs: topo_C_2d_2G}, + method={Remesh: L2_1, + Support: 'gpu', + ExtraArgs: {'device_id': DEVICE_ID, + 'device_type': 'gpu'}}, + mpi_params=mpi_params_S) +gradp_op = MultiphaseGradP(velocity=velo, gradp=gradp, viscosity=VISCOSITY, + discretization=topo_C_2d_2G, + mpi_params=mpi_params_S, + method={SpaceDiscretisation: FD_C_4, + ExtraArgs: {'gravity': [0., 0., -1.]}}) + +# CPU operators +advec = Advection(velo, + discretization=topo_C_2d, + variables={vorti: topo_C_2d}, + mpi_params=mpi_params_UW, + method={Scales: 'p_64', MultiScale: 'L4_4'}) +stretch = Stretching(velo, vorti, discretization=topo_C_2d_2G, + mpi_params=mpi_params_UW) +baroclinic = BaroclinicFromRHS(vorti, baroclinic_rhs, + discretization=topo_C_2d_2G, + mpi_params=mpi_params_UW) +diffusion = Diffusion(variables={vorti: topo_C_1d}, + viscosity=VISCOSITY, + mpi_params=mpi_params_UW) +poisson = Poisson(velo, vorti, discretization=topo_C_1d, + mpi_params=mpi_params_UW) +c = Curl(velo, vorti, discretization=topo_C_1d, + method={SpaceDiscretisation: 'fftw', GhostUpdate: True}, + mpi_params=mpi_params_UW) +#dt_output = None +#if IS_OUTPUT: +dt_output = IOParams(frequency=1, filename='dt.dat', fileformat=ASCII) +dt_adapt = AdaptTimeStep(velo, vorti, + simulation=simu, + time_range=[10, np.infty], + discretization=topo_C_2d_2G, + method={TimeIntegrator: RK3, + SpaceDiscretisation: FD_C_4, + dtCrit: ['gradU', 'cfl']}, + lcfl=MAX_LCFL, maxdt=MAX_DT, + cfl=MAX_CFL, + io_params=dt_output, + mpi_params=mpi_params_UW) + +# Operators discretizations +if box.is_on_task(TASK_SCALAR): + for op in (advec_scal, gradp_op, filter_scal, + baroclinic_rhs_op, diffusion_scal): + op.discretize() +if box.is_on_task(TASK_UW): + for op in (advec, stretch, diffusion, poisson, c, dt_adapt, baroclinic): + op.discretize() + +if IS_OUTPUT: + # Defining subdomains + L_Vx = 1. - 1. / (1. * USER_NB_ELEM_UW[0]) + L_Sx = 1. - 1. / (1. * USER_NB_ELEM_S[0]) + L_Vy = 1. - 1. / (1. * USER_NB_ELEM_UW[1]) + L_Sy = 1. - 1. / (1. * USER_NB_ELEM_S[1]) + L_Vz = 1. - 1. / (1. * USER_NB_ELEM_UW[2]) + L_Sz = 1. - 1. / (1. * USER_NB_ELEM_S[2]) + XY_plane_v = SubBox(origin=[0., 0., 1.], length=[L_Vx, L_Vy, 0.], + parent=box) + XZ_plane_v = SubBox(origin=[0., 0.5, 0.], length=[L_Vx, 0., L_Vz], + parent=box) + YZ_plane_s = SubBox(origin=[0.5, 0., 0.], length=[0., L_Sy, L_Sz], + parent=box) + XZ_plane_s = SubBox(origin=[0., 0.5, 0.], length=[L_Sx, 0., L_Sz], + parent=box) + # Defining output operators + p_velo = HDF_Writer(variables={velo: topo_C_1d, vorti: topo_C_1d}, + mpi_params=mpi_params_UW, + io_params=IOParams(frequency=out_freq, + filename='flow', + fileformat=HDF5)) + p_scal_yz = HDF_Writer(variables={scal: topo_F_2d}, + var_names={scal: 'Scalar'}, + subset=YZ_plane_s, + mpi_params=mpi_params_S, + io_params=IOParams(frequency=out_freq, + filename='scal_YZ', + fileformat=HDF5)) + p_scal_xz = HDF_Writer(variables={scal: topo_F_2d}, + var_names={scal: 'Scalar'}, + subset=XZ_plane_s, + mpi_params=mpi_params_S, + io_params=IOParams(frequency=out_freq, + filename='scal_XZ', + fileformat=HDF5)) + p_scal = HDF_Writer(variables={scal: topo_F_2d}, + var_names={scal: 'Scalar'}, + mpi_params=mpi_params_S, + io_params=IOParams(frequency=out_freq, + filename='scal', + fileformat=HDF5)) + p_scal_yz.name += '_(Scalar)' + p_scal_xz.name += '_(Scalar)' + p_scal.name += '_(Scalar)' + energy = EnergyEnstrophy(velocity=velo, + vorticity=vorti, + discretization=topo_C_1d, + mpi_params=mpi_params_UW, + io_params=IOParams(frequency=1, + filename='energy.dat', + fileformat=ASCII)) + maxvelo = CustomMonitor(variables={velo: topo_C_1d}, + function=calc_maxvelo, + res_shape=3, + mpi_params=mpi_params_UW, + io_params=IOParams(frequency=1, + filename='maxvelo.dat', + fileformat=ASCII)) + if box.is_on_task(TASK_UW): + for op in (p_velo, energy, maxvelo): + op.discretize() + if box.is_on_task(TASK_SCALAR): + for op in (p_scal_yz, p_scal_xz, p_scal): + op.discretize() + +# Redistribute operators +# CPU redistributes +W_C_2d_to_2G = RedistributeIntra(variables=[vorti], + source=advec, target=stretch, + mpi_params=mpi_params_UW) +W_C_2d_to_2G.name += '_W_C_2d_to_2G' +W_C_2d_2G_to_1d = RedistributeIntra(variables=[vorti], + source=baroclinic, # stretch, + target=diffusion, + mpi_params=mpi_params_UW) +W_C_2d_2G_to_1d.name += '_W_C_2d_2G_to_1d' +W_C_1d_to_C_2d_2G = RedistributeIntra(variables=[vorti], + source=diffusion, target=dt_adapt, + mpi_params=mpi_params_UW) +W_C_1d_to_C_2d_2G.name += '_W_C_1d_to_C_2d_2G' +W_C_1d_to_C_2d = RedistributeIntra(variables=[vorti], + source=diffusion, target=advec, + mpi_params=mpi_params_UW) +W_C_1d_to_C_2d.name += "_W_C_1d_to_C_2d" +U_C_1d_to_C_2d_2G = RedistributeIntra(variables=[velo], + source=poisson, target=stretch, + run_till=[stretch, dt_adapt], + mpi_params=mpi_params_UW) +U_C_1d_to_C_2d_2G.name += '_U_C_1d_to_C_2d_2G' +U_C_1d_to_C_2d = RedistributeIntra(variables=[velo], + source=poisson, target=advec, + mpi_params=mpi_params_UW) +U_C_1d_to_C_2d.name += '_U_C_1d_to_C_2d' +toDevU = DataTransfer(source=topo_C_2d_2G, target=advec_scal, + variables={velo: topo_C_2d_2G}, + run_till=[advec_scal, gradp_op], + mpi_params=mpi_params_S) +toDevU.name += '_ToDev_U' +U_CPU_to_GPU = RedistributeInter(variables=[velo], + parent=main_comm, + source=topo_C_1d, target=topo_C_2d_2G, + source_id=TASK_UW, target_id=TASK_SCALAR, + run_till=[toDevU]) +rhs_GPU_to_CPU = RedistributeInter(variables=[baroclinic_rhs], + parent=main_comm, + source=topo_C_2d_2G, target=topo_C_2d_2G, + source_id=TASK_SCALAR, target_id=TASK_UW, + run_till=[baroclinic]) +rhs_GPU_to_CPU.name += '_rhs_GPU_to_CPU' # ok +toHostRHS = DataTransfer(source=topo_C_2d_2G, + target=rhs_GPU_to_CPU, + variables={baroclinic_rhs: topo_C_2d_2G}, + mpi_params=mpi_params_S,) +toHostRHS.name += '_ToHost_RHS' +toDevGradP = DataTransfer(source=topo_C_2d_2G, target=baroclinic_rhs_op, + variables={gradp: topo_C_2d_2G}, + mpi_params=mpi_params_S) +toDevGradP.name += '_ToDev_GradP' + +if IS_OUTPUT: + toHostS = DataTransfer(source=topo_F_2d, target=p_scal_yz, + variables={scal: topo_F_2d}, + mpi_params=mpi_params_S, + freq=out_freq, + run_till=[p_scal_xz, p_scal_yz, p_scal]) + toHostS.name += '_ToHost_S' + +# Operators setup +if box.is_on_task(TASK_SCALAR): + for op in (advec_scal, gradp_op, + baroclinic_rhs_op, filter_scal, diffusion_scal): + op.setup() + if IS_OUTPUT: + for op in (p_scal_yz, p_scal_xz, p_scal, toHostS): + op.setup() + for op in (toDevU, toHostRHS, toDevGradP): + op.setup() +if box.is_on_task(TASK_UW): + for op in (advec, stretch, diffusion, poisson, c, dt_adapt, baroclinic): + op.setup() + if IS_OUTPUT: + for op in (p_velo, energy, maxvelo): + op.setup() + for op in (W_C_2d_to_2G, W_C_2d_2G_to_1d, + W_C_1d_to_C_2d, W_C_1d_to_C_2d_2G, + U_C_1d_to_C_2d, U_C_1d_to_C_2d_2G): + op.setup() +# Wait for all operators setup before setup the intra-comm redistribute +main_comm.Barrier() +U_CPU_to_GPU.setup() +rhs_GPU_to_CPU.setup() + +# Operators list +if IS_OUTPUT: + operators_list = [toDevU, gradp_op, toDevGradP, + advec_scal, diffusion_scal, + toHostS, p_scal_yz, p_scal_xz, p_scal, + baroclinic_rhs_op, filter_scal, + toHostRHS, rhs_GPU_to_CPU, + advec, W_C_2d_to_2G, stretch, baroclinic, + W_C_2d_2G_to_1d, diffusion, poisson, + p_velo, energy, maxvelo, + U_CPU_to_GPU, + U_C_1d_to_C_2d, U_C_1d_to_C_2d_2G, + W_C_1d_to_C_2d, W_C_1d_to_C_2d_2G, dt_adapt] +else: + operators_list = [toDevU, advec_scal, diffusion_scal, + advec, W_C_2d_to_2G, stretch, + W_C_2d_2G_to_1d, diffusion, poisson, + energy, maxvelo, + U_CPU_to_GPU, + U_C_1d_to_C_2d, U_C_1d_to_C_2d_2G, + W_C_1d_to_C_2d, W_C_1d_to_C_2d_2G, dt_adapt] + +# Fields initializations +if box.is_on_task(TASK_SCALAR): + scal.initialize(topo=topo_F_2d) + advec_dirX = advec_scal.advec_dir[0].discrete_op + gpu_scal = advec_dirX.fields_on_grid[0] + gpu_pscal = advec_dirX.fields_on_part[gpu_scal][0] + diffusion_scal.discrete_op.set_field_tmp(gpu_pscal) + if IS_OUTPUT: + p_scal_yz.apply(simu) + p_scal_xz.apply(simu) + p_scal.apply(simu) +if box.is_on_task(TASK_UW): + velo.initialize(topo=topo_C_1d) + c.apply(simu) + poisson.apply(simu) + U_C_1d_to_C_2d.apply(simu) + U_C_1d_to_C_2d_2G.apply(simu) + W_C_1d_to_C_2d.apply(simu) + W_C_1d_to_C_2d_2G.apply(simu) + if IS_OUTPUT: + p_velo.apply(simu) + # p_velo_xy.apply(simu) + # p_velo_xz.apply(simu) +U_CPU_to_GPU.apply(simu) +U_CPU_to_GPU.wait() +if box.is_on_task(TASK_SCALAR): + toDevU.apply(simu) + toDevU.wait() + gradp_op.initialize_velocity() + +simu.initialize() +setup_time = MPI.Wtime() - ctime +main_comm.Barrier() + +# Solve +solve_time = 0. +while not simu.isOver: + ctime = MPI.Wtime() + if main_rank == 0: + simu.printState() + for op in operators_list: + if box.is_on_task(op.task_id()): + op.apply(simu) + if box.is_on_task(TASK_SCALAR): + # Wait gpu operations on scalar + advec_scal.advec_dir[0].discreteFields[scal].wait() + solve_time += MPI.Wtime() - ctime + dt_adapt.wait() + # Synchronize threads + main_comm.Barrier() + simu.advance() + +U_CPU_to_GPU.wait() +main_comm.Barrier() +nb_ite = simu.currentIteration +simu.finalize() + + +if IS_OUTPUT: + if box.is_on_task(TASK_SCALAR): + p_scal_yz.apply(simu) + p_scal_xz.apply(simu) + p_scal.apply(simu) + if box.is_on_task(TASK_UW): + p_velo.apply(simu) + # p_velo_xy.apply(simu) + # p_velo_xz.apply(simu) + +for op in operators_list: + if box.is_on_task(op.task_id()): + op.finalize() + +velo.finalize() +if box.is_on_task(TASK_SCALAR): + scal.finalize() +if box.is_on_task(TASK_UW): + vorti.finalize() +main_comm.Barrier() diff --git a/Examples/Multiphase/create_random_arrays.py b/Examples/Multiphase/create_random_arrays.py new file mode 100644 index 0000000000000000000000000000000000000000..cb68fc081cc057b807251de4de8231bb6ecdf601 --- /dev/null +++ b/Examples/Multiphase/create_random_arrays.py @@ -0,0 +1,39 @@ +import os +import numpy as np +from hysop.constants import HYSOP_REAL, ORDER + + +def random_init(shape, mpi_comm): + # Create a folder to store all random arrays + d = 'rand_init' + if mpi_comm.Get_rank() == 0: + if not os.path.exists(d): + os.makedirs(d) + mpi_comm.Barrier() + file_name = "{0}_{1}_{2}".format(*shape) + file_name += "_{0}p_{1}.dat".format(mpi_comm.Get_size(), + mpi_comm.Get_rank()) + try: + randX = np.asarray( + np.reshape(np.fromfile(os.path.join(d, 'randX_' + file_name), + dtype=HYSOP_REAL), shape), + dtype=HYSOP_REAL, order=ORDER) + randY = np.asarray( + np.reshape(np.fromfile(os.path.join(d, 'randY_' + file_name), + dtype=HYSOP_REAL), shape), + dtype=HYSOP_REAL, order=ORDER) + randZ = np.asarray( + np.reshape(np.fromfile(os.path.join(d, 'randZ_' + file_name), + dtype=HYSOP_REAL), shape), + dtype=HYSOP_REAL, order=ORDER) + except IOError: + randX = np.asarray(np.random.random(shape), + dtype=HYSOP_REAL, order=ORDER) - 0.5 + randY = np.asarray(np.random.random(shape), + dtype=HYSOP_REAL, order=ORDER) - 0.5 + randZ = np.asarray(np.random.random(shape), + dtype=HYSOP_REAL, order=ORDER) - 0.5 + randX.tofile(os.path.join(d, 'randX_' + file_name)) + randY.tofile(os.path.join(d, 'randY_' + file_name)) + randZ.tofile(os.path.join(d, 'randZ_' + file_name)) + return randX, randY, randZ diff --git a/HySoP/hysop/gpu/gpu_kernel.py b/HySoP/hysop/gpu/gpu_kernel.py index 411e141ce0e176fc0c52a10a1fce4f77a3de454d..3508f316f69b905aaf2d7f918544b9c6b10deee8 100644 --- a/HySoP/hysop/gpu/gpu_kernel.py +++ b/HySoP/hysop/gpu/gpu_kernel.py @@ -2,10 +2,9 @@ @file gpu_kernel.py """ from hysop.constants import debug, S_DIR -#from hysop import __VERBOSE__ +from hysop import __VERBOSE__ from hysop.gpu import cl, CL_PROFILE from hysop.tools.profiler import FProfiler -__VERBOSE__ = True class KernelListLauncher(object): """ diff --git a/HySoP/hysop/gpu/multi_gpu_particle_advection.py b/HySoP/hysop/gpu/multi_gpu_particle_advection.py index 53f9803ab65d25ae6fc7f2ca293c3a151553a0c1..3a7c68c77c02ffbfa97fb3f117489a8b3523728f 100644 --- a/HySoP/hysop/gpu/multi_gpu_particle_advection.py +++ b/HySoP/hysop/gpu/multi_gpu_particle_advection.py @@ -126,16 +126,16 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): self.max_cfl_s = int(max_cfl * scale_factor) + 1 self.max_cfl_v = int(max_cfl) + 1 else: - try: - self.max_cfl_s = int(max_velocity[self.direction] * max_dt / - self._space_step[self.direction]) + 1 - self.max_cfl_v = int(max_velocity[self.direction] * max_dt / - self._v_space_step[self.direction]) + 1 - except TypeError: - self.max_cfl_s = int(max_velocity * max_dt / - self._space_step[self.direction]) + 1 - self.max_cfl_v = int(max_velocity * max_dt / - self._v_space_step[self.direction]) + 1 + try: + self.max_cfl_s = int(max_velocity[self.direction] * max_dt / + self._space_step[self.direction]) + 1 + self.max_cfl_v = int(max_velocity[self.direction] * max_dt / + self._v_space_step[self.direction]) + 1 + except TypeError: + self.max_cfl_s = int(max_velocity * max_dt / + self._space_step[self.direction]) + 1 + self.max_cfl_v = int(max_velocity * max_dt / + self._v_space_step[self.direction]) + 1 # Slice self._sl_dim = slice(self.dim, 2 * self.dim)