Skip to content
Snippets Groups Projects
Commit 9d094637 authored by Jean-Matthieu Etancelin's avatar Jean-Matthieu Etancelin
Browse files

Cleaning up. Need gpu version with L21 scheme

parent c32b73d5
No related branches found
No related tags found
No related merge requests found
......@@ -27,8 +27,6 @@ __kernel void coarse_to_fine_filter(__global const float* scal_in,
__local float line[WG*PTS_PER_CELL_X];
__local float result[NB_OUT_X][L_STENCIL][L_STENCIL];
__private float p_res[L_STENCIL][L_STENCIL][L_STENCIL];
///// IDEE : calculer les points de la cellule ouput calculee par un w-i dans les registres et ensuite l'ajouter à la memoire locale.
///// Utiliser un tableau prive de taille [L_STENCIL][L_STENCIL][L_STENCIL].
// Fill local arrays
// Output data
......
......@@ -131,6 +131,7 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
self.field_out.events.append(evts[-1])
# Get ghosts values and in-domain layer
# X-direction
s_gh = self.gh_out[0]
get_gh_xl = cl.enqueue_copy(
self.cl_env.queue,
self.gh_x, self.field_out.gpu_data[0],
......@@ -138,47 +139,49 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
buffer_origin=(0, 0, 0),
host_pitches=self._pitches_host_x,
buffer_pitches=self._pitches_buff,
region=(self.gh_x[:2, 0, 0].nbytes,
region=(self.gh_x[:2 * s_gh, 0, 0].nbytes,
self.gh_x.shape[1],
self.gh_x.shape[2]),
wait_for=evts)
get_gh_xr = cl.enqueue_copy(
self.cl_env.queue,
self.gh_x, self.field_out.gpu_data[0],
host_origin=(self.gh_x[:2, 0, 0].nbytes, 0, 0),
host_origin=(self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
buffer_origin=(self.field_out.data[0][:, 0, 0].nbytes -
self.gh_x[:2, 0, 0].nbytes, 0, 0),
self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
host_pitches=self._pitches_host_x,
buffer_pitches=self._pitches_buff,
region=(self.gh_x[:2, 0, 0].nbytes,
region=(self.gh_x[:2 * s_gh, 0, 0].nbytes,
self.gh_x.shape[1],
self.gh_x.shape[2]),
wait_for=evts)
get_gh_xl.wait()
get_gh_xr.wait()
# Add ghosts contributions in domain layer
self.gh_x[2, :, :] += self.gh_x[0, :, :]
self.gh_x[1, :, :] += self.gh_x[-1, :, :]
self.gh_x[2 * s_gh:3 * s_gh, :, :] += \
self.gh_x[0 * s_gh:1 * s_gh, :, :]
self.gh_x[1 * s_gh:2 * s_gh, :, :] += \
self.gh_x[3 * s_gh:4 * s_gh, :, :]
set_gh_xl = cl.enqueue_copy(
self.cl_env.queue,
self.field_out.gpu_data[0], self.gh_x,
host_origin=(self.gh_x[:1, 0, 0].nbytes, 0, 0),
buffer_origin=(self.gh_x[:1, 0, 0].nbytes, 0, 0),
host_origin=(self.gh_x[:1 * s_gh, 0, 0].nbytes, 0, 0),
buffer_origin=(self.gh_x[:1 * s_gh, 0, 0].nbytes, 0, 0),
host_pitches=self._pitches_host_x,
buffer_pitches=self._pitches_buff,
region=(self.gh_x[:1, 0, 0].nbytes,
region=(self.gh_x[:1 * s_gh, 0, 0].nbytes,
self.gh_x.shape[1],
self.gh_x.shape[2]),
wait_for=evts)
set_gh_xr = cl.enqueue_copy(
self.cl_env.queue,
self.field_out.gpu_data[0], self.gh_x,
host_origin=(self.gh_x[:3, 0, 0].nbytes, 0, 0),
self.field_out.gpu_data[0], self.gh_x,
host_origin=(self.gh_x[:3 * s_gh, 0, 0].nbytes, 0, 0),
buffer_origin=(self.field_out.data[0][:, 0, 0].nbytes -
self.gh_x[:1, 0, 0].nbytes, 0, 0),
self.gh_x[:1 * s_gh, 0, 0].nbytes, 0, 0),
host_pitches=self._pitches_host_x,
buffer_pitches=self._pitches_buff,
region=(self.gh_x[:1, 0, 0].nbytes,
region=(self.gh_x[:1 * s_gh, 0, 0].nbytes,
self.gh_x.shape[1],
self.gh_x.shape[2]),
wait_for=evts)
......@@ -186,6 +189,7 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
set_gh_xr.wait()
# Y-direction
s_gh = self.gh_out[1]
get_gh_yl = cl.enqueue_copy(
self.cl_env.queue,
self.gh_y, self.field_out.gpu_data[0],
......@@ -193,44 +197,47 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
buffer_origin=(0, 0, 0),
host_pitches=self._pitches_host_y,
buffer_pitches=self._pitches_buff,
region=(self.gh_y[:, 0, 0].nbytes, 2, self.gh_y.shape[2]),
region=(self.gh_y[:, 0, 0].nbytes, 2 * s_gh, self.gh_y.shape[2]),
wait_for=evts)
get_gh_yr = cl.enqueue_copy(
self.cl_env.queue,
self.gh_y, self.field_out.gpu_data[0],
host_origin=(0, 2, 0),
buffer_origin=(0, self.field_out.data[0].shape[1] - 2, 0),
host_origin=(0, 2 * s_gh, 0),
buffer_origin=(0, self.field_out.data[0].shape[1] - 2 * s_gh, 0),
host_pitches=self._pitches_host_y,
buffer_pitches=self._pitches_buff,
region=(self.gh_y[:, 0, 0].nbytes, 2, self.gh_y.shape[2]),
region=(self.gh_y[:, 0, 0].nbytes, 2 * s_gh, self.gh_y.shape[2]),
wait_for=evts)
get_gh_yl.wait()
get_gh_yr.wait()
# Add ghosts contributions in domain layer
self.gh_y[:, 2, :] += self.gh_y[:, 0, :]
self.gh_y[:, 1, :] += self.gh_y[:, -1, :]
self.gh_y[:, 2 * s_gh:3 * s_gh, :] += \
self.gh_y[:, 0 * s_gh:1 * s_gh, :]
self.gh_y[:, 1 * s_gh:2 * s_gh, :] += \
self.gh_y[:, 3 * s_gh:4 * s_gh, :]
set_gh_yl = cl.enqueue_copy(
self.cl_env.queue,
self.field_out.gpu_data[0], self.gh_y,
host_origin=(0, 1, 0),
buffer_origin=(0, 1, 0),
host_origin=(0, 1 * s_gh, 0),
buffer_origin=(0, 1 * s_gh, 0),
host_pitches=self._pitches_host_y,
buffer_pitches=self._pitches_buff,
region=(self.gh_y[:, 0, 0].nbytes, 1, self.gh_y.shape[2]),
region=(self.gh_y[:, 0, 0].nbytes, 1 * s_gh, self.gh_y.shape[2]),
wait_for=evts)
set_gh_yr = cl.enqueue_copy(
self.cl_env.queue,
self.field_out.gpu_data[0], self.gh_y,
host_origin=(0, 3, 0),
buffer_origin=(0, self.field_out.data[0].shape[1] - 1, 0),
host_origin=(0, 3 * s_gh, 0),
buffer_origin=(0, self.field_out.data[0].shape[1] - 1 * s_gh, 0),
host_pitches=self._pitches_host_y,
buffer_pitches=self._pitches_buff,
region=(self.gh_y[:, 0, 0].nbytes, 1, self.gh_y.shape[2]),
region=(self.gh_y[:, 0, 0].nbytes, 1 * s_gh, self.gh_y.shape[2]),
wait_for=evts)
set_gh_yl.wait()
set_gh_yr.wait()
# Z-direction
s_gh = self.gh_out[2]
get_gh_zl = cl.enqueue_copy(
self.cl_env.queue,
self.gh_z, self.field_out.gpu_data[0],
......@@ -238,44 +245,45 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
buffer_origin=(0, 0, 0),
host_pitches=self._pitches_host_z,
buffer_pitches=self._pitches_buff,
region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 2),
region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 2 * s_gh),
wait_for=evts)
get_gh_zr = cl.enqueue_copy(
self.cl_env.queue,
self.gh_z, self.field_out.gpu_data[0],
host_origin=(0, 0, 2),
buffer_origin=(0, 0, self.field_out.data[0].shape[2] - 2),
host_origin=(0, 0, 2 * s_gh),
buffer_origin=(0, 0, self.field_out.data[0].shape[2] - 2 * s_gh),
host_pitches=self._pitches_host_z,
buffer_pitches=self._pitches_buff,
region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 2),
region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 2 * s_gh),
wait_for=evts)
get_gh_zl.wait()
get_gh_zr.wait()
# Add ghosts contributions in domain layer
self.gh_z[:, :, 2] += self.gh_z[:, :, 0]
self.gh_z[:, :, 1] += self.gh_z[:, :, -1]
self.gh_z[:, :, 2 * s_gh:3 * s_gh] += \
self.gh_z[:, :, 0 * s_gh:1 * s_gh]
self.gh_z[:, :, 1 * s_gh:2 * s_gh] += \
self.gh_z[:, :, 3 * s_gh:4 * s_gh]
set_gh_zl = cl.enqueue_copy(
self.cl_env.queue,
self.field_out.gpu_data[0], self.gh_z,
host_origin=(0, 0, 1),
buffer_origin=(0, 0, 1),
host_origin=(0, 0, 1 * s_gh),
buffer_origin=(0, 0, 1 * s_gh),
host_pitches=self._pitches_host_z,
buffer_pitches=self._pitches_buff,
region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 1),
region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 1 * s_gh),
wait_for=evts)
set_gh_zr = cl.enqueue_copy(
self.cl_env.queue,
self.field_out.gpu_data[0], self.gh_z,
host_origin=(0, 0, 3),
buffer_origin=(0, 0, self.field_out.data[0].shape[2] - 1),
host_origin=(0, 0, 3 * s_gh),
buffer_origin=(0, 0, self.field_out.data[0].shape[2] - 1 * s_gh),
host_pitches=self._pitches_host_z,
buffer_pitches=self._pitches_buff,
region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 1),
region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 1 * s_gh),
wait_for=evts)
set_gh_zl.wait()
set_gh_zr.wait()
def get_profiling_info(self):
for p in self.fine_to_coarse.profile:
self.profiler += p
......@@ -27,7 +27,7 @@ def test_filter():
f = Field(box, formula=func)
f_py = Field(box, formula=func)
d_fine = Discretization([513, 513, 513])
d_coarse = Discretization([257, 257, 257], ghosts=[1, 1, 1])
d_coarse = Discretization([257, 257, 257], ghosts=[2, 2, 2])
op = MultiresolutionFilter(d_in=d_fine, d_out=d_coarse,
variables={f: d_coarse},
method={Remesh: Rmsh_Linear,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment