Skip to content
Snippets Groups Projects
Commit a8f7d453 authored by Jean-Matthieu Etancelin's avatar Jean-Matthieu Etancelin
Browse files

Ok (multi-)GPU multiresolution filter with L2_1. Bug in Linear. Need merge with cpu version.

parent 38d0c2d2
No related branches found
No related tags found
No related merge requests found
from hysop.constants import debug, np
from hysop.constants import debug, np, HYSOP_MPI_REAL, ORDERMPI
import hysop.tools.numpywrappers as npw
from hysop.operator.discrete.discrete import DiscreteOperator
#from hysop.operator.discrete.multiresolution_filter import FilterFineToCoarse
from hysop.gpu.gpu_operator import GPUOperator
from hysop.operator.discrete.discrete import get_extra_args_from_method
from hysop.operator.discrete.discrete import DiscreteOperator, get_extra_args_from_method
from hysop.gpu.gpu_discrete import GPUDiscreteField
from hysop.gpu.gpu_kernel import KernelLauncher
from hysop.methods_keys import Remesh
from hysop.gpu import cl
from hysop.methods import Rmsh_Linear, L2_1
......@@ -36,7 +35,8 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
resol_in = self._mesh_in.resolution - 2 * gh_in
resol_out = self._mesh_out.resolution - 2 * self.gh_out
pts_per_cell = resol_in / resol_out
assert np.all(pts_per_cell >= 1), "This operator is fine grid to coarse one"
assert np.all(pts_per_cell >= 1), \
"This operator is fine grid to coarse one"
self.scale_factor = np.prod(self._mesh_in.space_step) / \
np.prod(self._mesh_out.space_step)
......@@ -70,8 +70,8 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
self._comm = topo_in.comm
self._comm_size = self._comm.Get_size()
self._comm_rank = self._comm.Get_rank()
if self._comm_size > 1:
raise RuntimeError('MPI version is not yet implemented')
# if self._comm_size > 1:
# raise RuntimeError('MPI version is not yet implemented')
self._mesh_size_in = npw.ones(4, dtype=self.gpu_precision)
self._mesh_size_in[:self.dim] = \
......@@ -94,7 +94,24 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
self._append_size_constants(self.gh_out, prefix='GHOSTS_OUT')
self._append_size_constants(pts_per_cell, prefix='PTS_PER_CELL')
# multi-gpu ghosts buffers for communication
if self._comm_size == 1:
self._exchange_ghosts = self._exchange_ghosts_local
else:
self._exchange_ghosts = self._exchange_ghosts_mpi
self._gh_from_l = [None] * self.dim
self._gh_from_r = [None] * self.dim
self._gh_to_l = [None] * self.dim
self._gh_to_r = [None] * self.dim
# self._mpi_to_l = [None] * self.dim
# self._mpi_to_r = [None] * self.dim
for d in self._cutdir_list:
shape = list(self.field_out.data[0].shape)
shape[d] = self.gh_out[d]
self._gh_from_l[d] = npw.zeros(tuple(shape))
self._gh_from_r[d] = npw.zeros(tuple(shape))
self._gh_to_l[d] = npw.zeros(tuple(shape))
self._gh_to_r[d] = npw.zeros(tuple(shape))
# # Ghosts temp arrays for the second version of ghosts exchange
# self.gh_x = npw.zeros((4 * self.gh_out[0], shape_out[1], shape_out[2]))
......@@ -146,27 +163,76 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
# - rect-copy of only needed data
# The first one is running much faster than the second because of
# the use of the mapping of device buffer in host pinned memory.
# The second version is kept in comments
# The second version is kept in comments (for sequential case)
self._exchange_ghosts()
def _exchange_ghosts_local_d(self, d):
s_gh = self.gh_out[d]
sl = [slice(None) for _ in xrange(self.dim)]
sl_gh = [slice(None) for _ in xrange(self.dim)]
sl[d] = slice(1 * s_gh, 2 * s_gh)
sl_gh[d] = slice(-1 * s_gh, None)
self.field_out.data[0][tuple(sl)] += \
self.field_out.data[0][tuple(sl_gh)]
sl[d] = slice(-2 * s_gh, -1 * s_gh)
sl_gh[d] = slice(0, 1 * s_gh)
self.field_out.data[0][tuple(sl)] += \
self.field_out.data[0][tuple(sl_gh)]
def _exchange_ghosts_local(self):
self.field_out.toHost()
self.field_out.wait()
s_gh = self.gh_out[0]
self.field_out.data[0][1 * s_gh:2 * s_gh, :, :] += \
self.field_out.data[0][-1 * s_gh:, :, :]
self.field_out.data[0][-2 * s_gh:-1 * s_gh, :, :] += \
self.field_out.data[0][:1 * s_gh, :, :]
s_gh = self.gh_out[1]
self.field_out.data[0][:, 1 * s_gh:2 * s_gh, :] += \
self.field_out.data[0][:, -1 * s_gh:, :]
self.field_out.data[0][:, -2 * s_gh:-1 * s_gh, :] += \
self.field_out.data[0][:, :1 * s_gh, :]
s_gh = self.gh_out[2]
self.field_out.data[0][:, :, 1 * s_gh:2 * s_gh] += \
self.field_out.data[0][:, :, -1 * s_gh:]
self.field_out.data[0][:, :, -2 * s_gh:-1 * s_gh] += \
self.field_out.data[0][:, :, :1 * s_gh]
for d in xrange(self.dim):
self._exchange_ghosts_local_d(d)
self.field_out.toDevice()
def _exchange_ghosts_mpi_d(self, d):
s_gh = self.gh_out[d]
sl_l = [slice(None) for _ in xrange(self.dim)]
sl_gh_l = [slice(None) for _ in xrange(self.dim)]
sl_r = [slice(None) for _ in xrange(self.dim)]
sl_gh_r = [slice(None) for _ in xrange(self.dim)]
sl_l[d] = slice(1 * s_gh, 2 * s_gh)
sl_gh_r[d] = slice(-1 * s_gh, None)
sl_r[d] = slice(-2 * s_gh, -1 * s_gh)
sl_gh_l[d] = slice(0, 1 * s_gh)
first_cut_dir = self.field_out.topology.cutdir.tolist().index(True)
self._gh_to_l[d][...] = self.field_out.data[0][tuple(sl_gh_l)]
self._gh_to_r[d][...] = self.field_out.data[0][tuple(sl_gh_r)]
R_rk = self.field_out.topology.neighbours[1, d - first_cut_dir]
L_rk = self.field_out.topology.neighbours[0, d - first_cut_dir]
recv_r = self._comm.Irecv(
[self._gh_from_r[d], self._gh_from_r[d].size,
HYSOP_MPI_REAL],
source=R_rk, tag=1234 + R_rk + 19 * d)
recv_l = self._comm.Irecv(
[self._gh_from_l[d], self._gh_from_l[d].size,
HYSOP_MPI_REAL],
source=L_rk, tag=4321 + L_rk + 17 * d)
send_l = self._comm.Issend(
[self._gh_to_l[d], self._gh_to_l[d].size, HYSOP_MPI_REAL],
dest=L_rk, tag=1234 + self._comm_rank + 19 * d)
send_r = self._comm.Issend(
[self._gh_to_r[d], self._gh_to_r[d].size, HYSOP_MPI_REAL],
dest=R_rk, tag=4321 + self._comm_rank + 17 * d)
send_r.wait()
recv_l.wait()
self.field_out.data[0][tuple(sl_l)] += self._gh_from_l[d]
send_l.wait()
recv_r.wait()
self.field_out.data[0][tuple(sl_r)] += self._gh_from_r[d]
def _exchange_ghosts_mpi(self):
self.field_out.toHost()
self.field_out.wait()
for d in xrange(self.dim):
if d in self._cutdir_list:
self._exchange_ghosts_mpi_d(d)
else:
self._exchange_ghosts_local_d(d)
self.field_out.toDevice()
# # Get ghosts values and in-domain layer
# # X-direction
# s_gh = self.gh_out[0]
......
......@@ -57,13 +57,13 @@ def test_filter_linear():
op_py.apply(simu)
f_out.toHost()
op.discrete_op.cl_env.queue.finish()
# e = np.abs(valid[0][topo_coarse.mesh.iCompute] -
# f_out[0][topo_coarse.mesh.iCompute])
# print np.max(e), np.where(e>1e-6)[0].shape
print np.allclose(valid[0][topo_coarse.mesh.iCompute],
e = np.abs(valid[0][topo_coarse.mesh.iCompute] -
f_out[0][topo_coarse.mesh.iCompute])
print np.max(e), np.where(e>1e-6)[0].shape
assert np.allclose(valid[0][topo_coarse.mesh.iCompute],
f_out[0][topo_coarse.mesh.iCompute])
op.profiler.summarize()
print op.profiler
# op.profiler.summarize()
# print op.profiler
......@@ -93,7 +93,7 @@ def test_filter_L2_1():
if not t is topo_coarse][0]
f.initialize(topo=topo_fine)
#f_py.initialize(topo=topo_fine)
#f_in = f.discreteFields[topo_fine]
f_in = f.discreteFields[topo_fine]
f_out = f.discreteFields[topo_coarse]
#valid = f_py.discreteFields[topo_coarse]
valid = [npw.zeros(f_out[0].shape), ]
......@@ -107,14 +107,13 @@ def test_filter_L2_1():
op.discrete_op.cl_env.queue.finish()
# e = np.abs(valid[0][topo_coarse.mesh.iCompute] -
# f_out[0][topo_coarse.mesh.iCompute])
# print np.max(e), np.where(e>1e-6)[0].shape
print np.allclose(valid[0][topo_coarse.mesh.iCompute],
#print np.max(e), np.where(e>1e-6)#[0].shape
assert np.allclose(valid[0][topo_coarse.mesh.iCompute],
f_out[0][topo_coarse.mesh.iCompute])
op.profiler.summarize()
print op.profiler
# op.profiler.summarize()
# print op.profiler
if __name__ == '__main__':
test_filter_linear()
#test_filter_linear()
test_filter_L2_1()
from hysop.constants import debug, np
from hysop.constants import debug, np, HYSOP_MPI_REAL, ORDERMPI
from hysop.operator.discrete.discrete import DiscreteOperator
from hysop.tools.profiler import profile
import hysop.tools.numpywrappers as npw
......@@ -17,24 +17,42 @@ class FilterFineToCoarse(DiscreteOperator):
self.output = [self.field_out]
self._mesh_in = self.field_in[0].topology.mesh
self._mesh_out = self.field_out[0].topology.mesh
gh_out = self.field_out[0].topology.ghosts()
self.gh_out = self.field_out[0].topology.ghosts()
gh_in = self.field_in[0].topology.ghosts()
#print gh_in, gh_out
#print gh_in, self.gh_out
resol_in = self._mesh_in.resolution - 2 * gh_in
resol_out = self._mesh_out.resolution - 2 * gh_out
resol_out = self._mesh_out.resolution - 2 * self.gh_out
pts_per_cell = resol_in / resol_out
assert np.all(pts_per_cell >= 1), "This operator is fine grid to coarse one"
self.scale_factor = np.prod(self._mesh_in.space_step) / \
np.prod(self._mesh_out.space_step)
#print pts_per_cell, self.scale_factor
#print self._mesh_in.coords
# multi-gpu ghosts buffers for communication
self._cutdir_list = np.where(self.field_in[0].topology.cutdir)[0].tolist()
self._comm = self.field_in[0].topology.comm
self._comm_size = self._comm.Get_size()
self._comm_rank = self._comm.Get_rank()
if self._comm_size == 1:
self._exchange_ghosts = self._exchange_ghosts_local
else:
self._exchange_ghosts = self._exchange_ghosts_mpi
self._gh_from_l = [None] * self._dim
self._gh_from_r = [None] * self._dim
self._gh_to_l = [None] * self._dim
self._gh_to_r = [None] * self._dim
for d in self._cutdir_list:
shape = list(self.field_out[0].data[0].shape)
shape[d] = self.gh_out[d]
self._gh_from_l[d] = npw.zeros(tuple(shape))
self._gh_from_r[d] = npw.zeros(tuple(shape))
self._gh_to_l[d] = npw.zeros(tuple(shape))
self._gh_to_r[d] = npw.zeros(tuple(shape))
in_coords = (self._mesh_in.coords - self._mesh_in.origin) / \
self._mesh_out.space_step
self.floor_coords = np.array([np.floor(c) for c in in_coords])
self.dist_coords = in_coords - self.floor_coords
self._work_weight = np.array([npw.zeros_like(c) for c in self.dist_coords])
#print type(self.dist_coords), self.dist_coords.shape, self.dist_coords[0].shape
#print type(self._work_weight), self._work_weight.shape, self._work_weight[0].shape
# Slices to serialize concurrent access in coarse grid
# Several points in fine grid are laying in the same coarse cell
# The serialization avoid concurrent access.
......@@ -66,12 +84,12 @@ class FilterFineToCoarse(DiscreteOperator):
self._rmsh_method = Rmsh_Linear()
if isinstance(self._rmsh_method, Rmsh_Linear):
# Linear interpolation
assert np.all(gh_out >= 1), "Output data must have at least one " \
" ghosts in all directions (" + str(gh_out) + " given)"
assert np.all(self.gh_out >= 1), "Output data must have at least one " \
" ghosts in all directions (" + str(self.gh_out) + " given)"
elif isinstance(self._rmsh_method, L2_1):
# L2_1 interpolation
assert np.all(gh_out >= 2), "Output data must have at least one " \
" ghosts in all directions (" + str(gh_out) + " given)"
assert np.all(self.gh_out >= 2), "Output data must have at least one " \
" ghosts in all directions (" + str(self.gh_out) + " given)"
else:
raise ValueError("The multiresolution filter is implemented for " +
"Linear or L2_1 interpolation ("
......@@ -102,66 +120,66 @@ class FilterFineToCoarse(DiscreteOperator):
self._rwork = npw.zeros(resol_out)
self._bc_from_ghosts = []
self._bc_to_compute = []
if gh_out[0] > 0:
if self.gh_out[0] > 0:
# Right X-dir ghosts
self._bc_from_ghosts.append((
slice(self._mesh_out.iCompute[0].stop,
self._mesh_out.iCompute[0].stop + gh_out[0], None),
self._mesh_out.iCompute[0].stop + self.gh_out[0], None),
slice(None), slice(None)))
self._bc_to_compute.append((
slice(self._mesh_out.iCompute[0].start,
self._mesh_out.iCompute[0].start + gh_out[0], None),
self._mesh_out.iCompute[0].start + self.gh_out[0], None),
slice(None), slice(None)))
# Left X-dir ghosts
self._bc_from_ghosts.append((
slice(self._mesh_out.iCompute[0].start - gh_out[0],
slice(self._mesh_out.iCompute[0].start - self.gh_out[0],
self._mesh_out.iCompute[0].start, None),
slice(None), slice(None)))
self._bc_to_compute.append((
slice(self._mesh_out.iCompute[0].stop - gh_out[0],
slice(self._mesh_out.iCompute[0].stop - self.gh_out[0],
self._mesh_out.iCompute[0].stop, None),
slice(None), slice(None)))
if gh_out[1] > 0:
if self.gh_out[1] > 0:
# Right Y-dir ghosts
self._bc_from_ghosts.append((
slice(None),
slice(self._mesh_out.iCompute[1].stop,
self._mesh_out.iCompute[1].stop + gh_out[1], None),
self._mesh_out.iCompute[1].stop + self.gh_out[1], None),
slice(None)))
self._bc_to_compute.append((
slice(None),
slice(self._mesh_out.iCompute[1].start,
self._mesh_out.iCompute[1].start + gh_out[1], None),
self._mesh_out.iCompute[1].start + self.gh_out[1], None),
slice(None)))
# Left Y-dir ghosts
self._bc_from_ghosts.append((
slice(None),
slice(self._mesh_out.iCompute[1].start - gh_out[1],
slice(self._mesh_out.iCompute[1].start - self.gh_out[1],
self._mesh_out.iCompute[1].start, None),
slice(None)))
self._bc_to_compute.append((
slice(None),
slice(self._mesh_out.iCompute[1].stop - gh_out[1],
slice(self._mesh_out.iCompute[1].stop - self.gh_out[1],
self._mesh_out.iCompute[1].stop, None),
slice(None)))
if gh_out[2] > 0:
if self.gh_out[2] > 0:
# Right Z-dir ghosts
self._bc_from_ghosts.append((
slice(None), slice(None),
slice(self._mesh_out.iCompute[2].stop,
self._mesh_out.iCompute[2].stop + gh_out[2], None)))
self._mesh_out.iCompute[2].stop + self.gh_out[2], None)))
self._bc_to_compute.append((
slice(None), slice(None),
slice(self._mesh_out.iCompute[2].start,
self._mesh_out.iCompute[2].start + gh_out[2], None)))
self._mesh_out.iCompute[2].start + self.gh_out[2], None)))
# Left Z-dir ghosts
self._bc_from_ghosts.append((
slice(None), slice(None),
slice(self._mesh_out.iCompute[2].start - gh_out[2],
slice(self._mesh_out.iCompute[2].start - self.gh_out[2],
self._mesh_out.iCompute[2].start, None)))
self._bc_to_compute.append((
slice(None), slice(None),
slice(self._mesh_out.iCompute[2].stop - gh_out[2],
slice(self._mesh_out.iCompute[2].stop - self.gh_out[2],
self._mesh_out.iCompute[2].stop, None)))
@debug
......@@ -222,3 +240,65 @@ class FilterFineToCoarse(DiscreteOperator):
#print fr, to
#print v_out[d][fr].shape, v_out[d][to].shape
v_out[d][to] += v_out[d][fr]
self._exchange_ghosts()
def _exchange_ghosts_local_d(self, d):
s_gh = self.gh_out[d]
sl = [slice(None) for _ in xrange(self._dim)]
sl_gh = [slice(None) for _ in xrange(self._dim)]
sl[d] = slice(1 * s_gh, 2 * s_gh)
sl_gh[d] = slice(-1 * s_gh, None)
for v_out in self.field_out:
v_out.data[0][tuple(sl)] += v_out.data[0][tuple(sl_gh)]
sl[d] = slice(-2 * s_gh, -1 * s_gh)
sl_gh[d] = slice(0, 1 * s_gh)
for v_out in self.field_out:
v_out.data[0][tuple(sl)] += v_out.data[0][tuple(sl_gh)]
def _exchange_ghosts_local(self):
for d in xrange(self._dim):
self._exchange_ghosts_local_d(d)
def _exchange_ghosts_mpi_d(self, d):
s_gh = self.gh_out[d]
sl_l = [slice(None) for _ in xrange(self._dim)]
sl_gh_l = [slice(None) for _ in xrange(self._dim)]
sl_r = [slice(None) for _ in xrange(self._dim)]
sl_gh_r = [slice(None) for _ in xrange(self._dim)]
sl_l[d] = slice(1 * s_gh, 2 * s_gh)
sl_gh_r[d] = slice(-1 * s_gh, None)
sl_r[d] = slice(-2 * s_gh, -1 * s_gh)
sl_gh_l[d] = slice(0, 1 * s_gh)
for v_out in self.field_out:
first_cut_dir = v_out.topology.cutdir.tolist().index(True)
self._gh_to_l[d][...] = v_out.data[0][tuple(sl_gh_l)]
self._gh_to_r[d][...] = v_out.data[0][tuple(sl_gh_r)]
R_rk = v_out.topology.neighbours[1, d - first_cut_dir]
L_rk = v_out.topology.neighbours[0, d - first_cut_dir]
recv_r = self._comm.Irecv(
[self._gh_from_r[d], self._gh_from_r[d].size,
HYSOP_MPI_REAL],
source=R_rk, tag=1234 + R_rk + 19 * d)
recv_l = self._comm.Irecv(
[self._gh_from_l[d], self._gh_from_l[d].size,
HYSOP_MPI_REAL],
source=L_rk, tag=4321 + L_rk + 17 * d)
send_l = self._comm.Issend(
[self._gh_to_l[d], self._gh_to_l[d].size, HYSOP_MPI_REAL],
dest=L_rk, tag=1234 + self._comm_rank + 19 * d)
send_r = self._comm.Issend(
[self._gh_to_r[d], self._gh_to_r[d].size, HYSOP_MPI_REAL],
dest=R_rk, tag=4321 + self._comm_rank + 17 * d)
send_r.wait()
recv_l.wait()
v_out.data[0][tuple(sl_l)] += self._gh_from_l[d]
send_l.wait()
recv_r.wait()
v_out.data[0][tuple(sl_r)] += self._gh_from_r[d]
def _exchange_ghosts_mpi(self):
for d in xrange(self._dim):
if d in self._cutdir_list:
self._exchange_ghosts_mpi_d(d)
else:
self._exchange_ghosts_local_d(d)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment