Ok multiresolution filter and test

38d0c2d2 · Jean-Matthieu Etancelin · cf5c5d17 · 38d0c2d2 · 38d0c2d2
Commit 38d0c2d2 authored 10 years ago by Jean-Matthieu Etancelin
--- a/HySoP/hysop/gpu/multiresolution_filter.py
+++ b/HySoP/hysop/gpu/multiresolution_filter.py
@@ -94,19 +94,21 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
        self._append_size_constants(self.gh_out, prefix='GHOSTS_OUT')
        self._append_size_constants(pts_per_cell, prefix='PTS_PER_CELL')
-        # Ghosts temp arrays
-        self.gh_x = npw.zeros((4 * self.gh_out[0], shape_out[1], shape_out[2]))
-        self.gh_y = npw.zeros((shape_out[0], 4 * self.gh_out[1], shape_out[2]))
+        # # Ghosts temp arrays for the second version of ghosts exchange
-        self.gh_z = npw.zeros((shape_out[0], shape_out[1], 4 * self.gh_out[2]))
+        # self.gh_x = npw.zeros((4 * self.gh_out[0], shape_out[1], shape_out[2]))
-        print self.gh_x.shape, self.gh_y.shape, self.gh_z.shape
+        # self.gh_y = npw.zeros((shape_out[0], 4 * self.gh_out[1], shape_out[2]))
-        self._pitches_host_x = (int(self.gh_x[:, 0, 0].nbytes),
+        # self.gh_z = npw.zeros((shape_out[0], shape_out[1], 4 * self.gh_out[2]))
-                                int(self.gh_x[:, :, 0].nbytes))
+        # print self.gh_x.shape, self.gh_y.shape, self.gh_z.shape
-        self._pitches_host_y = (int(self.gh_y[:, 0, 0].nbytes),
+        # self._pitches_host_x = (int(self.gh_x[:, 0, 0].nbytes),
-                                int(self.gh_y[:, :, 0].nbytes))
+        #                         int(self.gh_x[:, :, 0].nbytes))
-        self._pitches_host_z = (int(self.gh_z[:, 0, 0].nbytes),
+        # self._pitches_host_y = (int(self.gh_y[:, 0, 0].nbytes),
-                                int(self.gh_z[:, :, 0].nbytes))
+        #                         int(self.gh_y[:, :, 0].nbytes))
-        self._pitches_buff = (int(self.field_out.data[0][:, 0, 0].nbytes),
+        # self._pitches_host_z = (int(self.gh_z[:, 0, 0].nbytes),
-                              int(self.field_out.data[0][:, :, 0].nbytes))
+        #                         int(self.gh_z[:, :, 0].nbytes))
+        # self._pitches_buff = (int(self.field_out.data[0][:, 0, 0].nbytes),
+        #                       int(self.field_out.data[0][:, :, 0].nbytes))
        src, vec, f_space = \
            self._kernel_cfg['fine_to_coarse_filter']
@@ -137,160 +139,188 @@ class GPUFilterFineToCoarse(DiscreteOperator, GPUOperator):
                                                np.int32(iy), np.int32(iz),
                                                wait_for=evts))
                self.field_out.events.append(evts[-1])
-        # Get ghosts values and in-domain layer
+        # Ghosts values must be exchanged either on process or through mpi
-        # X-direction
+        # communications. Values must be moved to host.
-        s_gh = self.gh_out[0]
+        # We developp 2 versions:
-        get_gh_xl = cl.enqueue_copy(
+        #  - copy of the entire field data
-            self.cl_env.queue,
+        #  - rect-copy of only needed data
-            self.gh_x, self.field_out.gpu_data[0],
+        # The first one is running much faster than the second because of
-            host_origin=(0, 0, 0),
+        # the use of the mapping of device buffer in host pinned memory.
-            buffer_origin=(0, 0, 0),
+        # The second version is kept in comments
-            host_pitches=self._pitches_host_x,
-            buffer_pitches=self._pitches_buff,
-            region=(self.gh_x[:2 * s_gh, 0, 0].nbytes,
-                    self.gh_x.shape[1],
-                    self.gh_x.shape[2]),
-            wait_for=evts)
-        get_gh_xr = cl.enqueue_copy(
-            self.cl_env.queue,
-            self.gh_x, self.field_out.gpu_data[0],
-            host_origin=(self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
-            buffer_origin=(self.field_out.data[0][:, 0, 0].nbytes -
-                           self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
-            host_pitches=self._pitches_host_x,
-            buffer_pitches=self._pitches_buff,
-            region=(self.gh_x[:2 * s_gh, 0, 0].nbytes,
-                    self.gh_x.shape[1],
-                    self.gh_x.shape[2]),
-            wait_for=evts)
-        get_gh_xl.wait()
-        get_gh_xr.wait()
-        # Add ghosts contributions in domain layer
-        self.gh_x[2 * s_gh:3 * s_gh, :, :] += \
-            self.gh_x[0 * s_gh:1 * s_gh, :, :]
-        self.gh_x[1 * s_gh:2 * s_gh, :, :] += \
-            self.gh_x[3 * s_gh:4 * s_gh, :, :]
-        set_gh_xl = cl.enqueue_copy(
-            self.cl_env.queue,
-            self.field_out.gpu_data[0], self.gh_x,
-            host_origin=(self.gh_x[:1 * s_gh, 0, 0].nbytes, 0, 0),
-            buffer_origin=(self.gh_x[:1 * s_gh, 0, 0].nbytes, 0, 0),
-            host_pitches=self._pitches_host_x,
-            buffer_pitches=self._pitches_buff,
-            region=(self.gh_x[:1 * s_gh, 0, 0].nbytes,
-                    self.gh_x.shape[1],
-                    self.gh_x.shape[2]),
-            wait_for=evts)
-        set_gh_xr = cl.enqueue_copy(
-            self.cl_env.queue,
-            self.field_out.gpu_data[0], self.gh_x,
-            host_origin=(self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
-            buffer_origin=(self.field_out.data[0][:, 0, 0].nbytes -
-                           self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
-            host_pitches=self._pitches_host_x,
-            buffer_pitches=self._pitches_buff,
-            region=(self.gh_x[:1 * s_gh, 0, 0].nbytes,
-                    self.gh_x.shape[1],
-                    self.gh_x.shape[2]),
-            wait_for=evts)
-        set_gh_xl.wait()
-        set_gh_xr.wait()
-        # Y-direction
+        self.field_out.toHost()
+        self.field_out.wait()
+        s_gh = self.gh_out[0]
+        self.field_out.data[0][1 * s_gh:2 * s_gh, :, :] += \
+            self.field_out.data[0][-1 * s_gh:, :, :]
+        self.field_out.data[0][-2 * s_gh:-1 * s_gh, :, :] += \
+            self.field_out.data[0][:1 * s_gh, :, :]
        s_gh = self.gh_out[1]
-        get_gh_yl = cl.enqueue_copy(
+        self.field_out.data[0][:, 1 * s_gh:2 * s_gh, :] += \
-            self.cl_env.queue,
+            self.field_out.data[0][:, -1 * s_gh:, :]
-            self.gh_y, self.field_out.gpu_data[0],
+        self.field_out.data[0][:, -2 * s_gh:-1 * s_gh, :] += \
-            host_origin=(0, 0, 0),
+            self.field_out.data[0][:, :1 * s_gh, :]
-            buffer_origin=(0, 0, 0),
-            host_pitches=self._pitches_host_y,
-            buffer_pitches=self._pitches_buff,
-            region=(self.gh_y[:, 0, 0].nbytes, 2 * s_gh, self.gh_y.shape[2]),
-            wait_for=evts)
-        get_gh_yr = cl.enqueue_copy(
-            self.cl_env.queue,
-            self.gh_y, self.field_out.gpu_data[0],
-            host_origin=(0, 2 * s_gh, 0),
-            buffer_origin=(0, self.field_out.data[0].shape[1] - 2 * s_gh, 0),
-            host_pitches=self._pitches_host_y,
-            buffer_pitches=self._pitches_buff,
-            region=(self.gh_y[:, 0, 0].nbytes, 2 * s_gh, self.gh_y.shape[2]),
-            wait_for=evts)
-        get_gh_yl.wait()
-        get_gh_yr.wait()
-        # Add ghosts contributions in domain layer
-        self.gh_y[:, 2 * s_gh:3 * s_gh, :] += \
-            self.gh_y[:, 0 * s_gh:1 * s_gh, :]
-        self.gh_y[:, 1 * s_gh:2 * s_gh, :] += \
-            self.gh_y[:, 3 * s_gh:4 * s_gh, :]
-        set_gh_yl = cl.enqueue_copy(
-            self.cl_env.queue,
-            self.field_out.gpu_data[0], self.gh_y,
-            host_origin=(0, 1 * s_gh, 0),
-            buffer_origin=(0, 1 * s_gh, 0),
-            host_pitches=self._pitches_host_y,
-            buffer_pitches=self._pitches_buff,
-            region=(self.gh_y[:, 0, 0].nbytes, 1 * s_gh, self.gh_y.shape[2]),
-            wait_for=evts)
-        set_gh_yr = cl.enqueue_copy(
-            self.cl_env.queue,
-            self.field_out.gpu_data[0], self.gh_y,
-            host_origin=(0, 2 * s_gh, 0),
-            buffer_origin=(0, self.field_out.data[0].shape[1] - 2 * s_gh, 0),
-            host_pitches=self._pitches_host_y,
-            buffer_pitches=self._pitches_buff,
-            region=(self.gh_y[:, 0, 0].nbytes, 1 * s_gh, self.gh_y.shape[2]),
-            wait_for=evts)
-        set_gh_yl.wait()
-        set_gh_yr.wait()
-        # Z-direction
        s_gh = self.gh_out[2]
-        get_gh_zl = cl.enqueue_copy(
+        self.field_out.data[0][:, :, 1 * s_gh:2 * s_gh] += \
-            self.cl_env.queue,
+            self.field_out.data[0][:, :, -1 * s_gh:]
-            self.gh_z, self.field_out.gpu_data[0],
+        self.field_out.data[0][:, :, -2 * s_gh:-1 * s_gh] += \
-            host_origin=(0, 0, 0),
+            self.field_out.data[0][:, :, :1 * s_gh]
-            buffer_origin=(0, 0, 0),
+        self.field_out.toDevice()
-            host_pitches=self._pitches_host_z,
-            buffer_pitches=self._pitches_buff,
+        # # Get ghosts values and in-domain layer
-            region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 2 * s_gh),
+        # # X-direction
-            wait_for=evts)
+        # s_gh = self.gh_out[0]
-        get_gh_zr = cl.enqueue_copy(
+        # get_gh_xl = cl.enqueue_copy(
-            self.cl_env.queue,
+        #     self.cl_env.queue,
-            self.gh_z, self.field_out.gpu_data[0],
+        #     self.gh_x, self.field_out.gpu_data[0],
-            host_origin=(0, 0, 2 * s_gh),
+        #     host_origin=(0, 0, 0),
-            buffer_origin=(0, 0, self.field_out.data[0].shape[2] - 2 * s_gh),
+        #     buffer_origin=(0, 0, 0),
-            host_pitches=self._pitches_host_z,
+        #     host_pitches=self._pitches_host_x,
-            buffer_pitches=self._pitches_buff,
+        #     buffer_pitches=self._pitches_buff,
-            region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 2 * s_gh),
+        #     region=(self.gh_x[:2 * s_gh, 0, 0].nbytes,
-            wait_for=evts)
+        #             self.gh_x.shape[1],
-        get_gh_zl.wait()
+        #             self.gh_x.shape[2]),
-        get_gh_zr.wait()
+        #     wait_for=evts)
-        # Add ghosts contributions in domain layer
+        # get_gh_xr = cl.enqueue_copy(
-        self.gh_z[:, :, 2 * s_gh:3 * s_gh] += \
+        #     self.cl_env.queue,
-            self.gh_z[:, :, 0 * s_gh:1 * s_gh]
+        #     self.gh_x, self.field_out.gpu_data[0],
-        self.gh_z[:, :, 1 * s_gh:2 * s_gh] += \
+        #     host_origin=(self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
-            self.gh_z[:, :, 3 * s_gh:4 * s_gh]
+        #     buffer_origin=(self.field_out.data[0][:, 0, 0].nbytes -
-        set_gh_zl = cl.enqueue_copy(
+        #                    self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
-            self.cl_env.queue,
+        #     host_pitches=self._pitches_host_x,
-            self.field_out.gpu_data[0], self.gh_z,
+        #     buffer_pitches=self._pitches_buff,
-            host_origin=(0, 0, 1 * s_gh),
+        #     region=(self.gh_x[:2 * s_gh, 0, 0].nbytes,
-            buffer_origin=(0, 0, 1 * s_gh),
+        #             self.gh_x.shape[1],
-            host_pitches=self._pitches_host_z,
+        #             self.gh_x.shape[2]),
-            buffer_pitches=self._pitches_buff,
+        #     wait_for=evts)
-            region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 1 * s_gh),
+        # get_gh_xl.wait()
-            wait_for=evts)
+        # get_gh_xr.wait()
-        set_gh_zr = cl.enqueue_copy(
+        # # Add ghosts contributions in domain layer
-            self.cl_env.queue,
+        # self.gh_x[2 * s_gh:3 * s_gh, :, :] += \
-            self.field_out.gpu_data[0], self.gh_z,
+        #     self.gh_x[0 * s_gh:1 * s_gh, :, :]
-            host_origin=(0, 0, 2 * s_gh),
+        # self.gh_x[1 * s_gh:2 * s_gh, :, :] += \
-            buffer_origin=(0, 0, self.field_out.data[0].shape[2] - 2 * s_gh),
+        #     self.gh_x[3 * s_gh:4 * s_gh, :, :]
-            host_pitches=self._pitches_host_z,
+        # set_gh_xl = cl.enqueue_copy(
-            buffer_pitches=self._pitches_buff,
+        #     self.cl_env.queue,
-            region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 1 * s_gh),
+        #     self.field_out.gpu_data[0], self.gh_x,
-            wait_for=evts)
+        #     host_origin=(self.gh_x[:1 * s_gh, 0, 0].nbytes, 0, 0),
-        set_gh_zl.wait()
+        #     buffer_origin=(self.gh_x[:1 * s_gh, 0, 0].nbytes, 0, 0),
-        set_gh_zr.wait()
+        #     host_pitches=self._pitches_host_x,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_x[:1 * s_gh, 0, 0].nbytes,
+        #             self.gh_x.shape[1],
+        #             self.gh_x.shape[2]),
+        #     wait_for=evts)
+        # set_gh_xr = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.field_out.gpu_data[0], self.gh_x,
+        #     host_origin=(self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
+        #     buffer_origin=(self.field_out.data[0][:, 0, 0].nbytes -
+        #                    self.gh_x[:2 * s_gh, 0, 0].nbytes, 0, 0),
+        #     host_pitches=self._pitches_host_x,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_x[:1 * s_gh, 0, 0].nbytes,
+        #             self.gh_x.shape[1],
+        #             self.gh_x.shape[2]),
+        #     wait_for=evts)
+        # set_gh_xl.wait()
+        # set_gh_xr.wait()
+        # # Y-direction
+        # s_gh = self.gh_out[1]
+        # get_gh_yl = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.gh_y, self.field_out.gpu_data[0],
+        #     host_origin=(0, 0, 0),
+        #     buffer_origin=(0, 0, 0),
+        #     host_pitches=self._pitches_host_y,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_y[:, 0, 0].nbytes, 2 * s_gh, self.gh_y.shape[2]),
+        #     wait_for=evts)
+        # get_gh_yr = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.gh_y, self.field_out.gpu_data[0],
+        #     host_origin=(0, 2 * s_gh, 0),
+        #     buffer_origin=(0, self.field_out.data[0].shape[1] - 2 * s_gh, 0),
+        #     host_pitches=self._pitches_host_y,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_y[:, 0, 0].nbytes, 2 * s_gh, self.gh_y.shape[2]),
+        #     wait_for=evts)
+        # get_gh_yl.wait()
+        # get_gh_yr.wait()
+        # # Add ghosts contributions in domain layer
+        # self.gh_y[:, 2 * s_gh:3 * s_gh, :] += \
+        #     self.gh_y[:, 0 * s_gh:1 * s_gh, :]
+        # self.gh_y[:, 1 * s_gh:2 * s_gh, :] += \
+        #     self.gh_y[:, 3 * s_gh:4 * s_gh, :]
+        # set_gh_yl = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.field_out.gpu_data[0], self.gh_y,
+        #     host_origin=(0, 1 * s_gh, 0),
+        #     buffer_origin=(0, 1 * s_gh, 0),
+        #     host_pitches=self._pitches_host_y,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_y[:, 0, 0].nbytes, 1 * s_gh, self.gh_y.shape[2]),
+        #     wait_for=evts)
+        # set_gh_yr = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.field_out.gpu_data[0], self.gh_y,
+        #     host_origin=(0, 2 * s_gh, 0),
+        #     buffer_origin=(0, self.field_out.data[0].shape[1] - 2 * s_gh, 0),
+        #     host_pitches=self._pitches_host_y,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_y[:, 0, 0].nbytes, 1 * s_gh, self.gh_y.shape[2]),
+        #     wait_for=evts)
+        # set_gh_yl.wait()
+        # set_gh_yr.wait()
+        # # Z-direction
+        # s_gh = self.gh_out[2]
+        # get_gh_zl = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.gh_z, self.field_out.gpu_data[0],
+        #     host_origin=(0, 0, 0),
+        #     buffer_origin=(0, 0, 0),
+        #     host_pitches=self._pitches_host_z,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 2 * s_gh),
+        #     wait_for=evts)
+        # get_gh_zr = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.gh_z, self.field_out.gpu_data[0],
+        #     host_origin=(0, 0, 2 * s_gh),
+        #     buffer_origin=(0, 0, self.field_out.data[0].shape[2] - 2 * s_gh),
+        #     host_pitches=self._pitches_host_z,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 2 * s_gh),
+        #     wait_for=evts)
+        # get_gh_zl.wait()
+        # get_gh_zr.wait()
+        # # Add ghosts contributions in domain layer
+        # self.gh_z[:, :, 2 * s_gh:3 * s_gh] += \
+        #     self.gh_z[:, :, 0 * s_gh:1 * s_gh]
+        # self.gh_z[:, :, 1 * s_gh:2 * s_gh] += \
+        #     self.gh_z[:, :, 3 * s_gh:4 * s_gh]
+        # set_gh_zl = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.field_out.gpu_data[0], self.gh_z,
+        #     host_origin=(0, 0, 1 * s_gh),
+        #     buffer_origin=(0, 0, 1 * s_gh),
+        #     host_pitches=self._pitches_host_z,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 1 * s_gh),
+        #     wait_for=evts)
+        # set_gh_zr = cl.enqueue_copy(
+        #     self.cl_env.queue,
+        #     self.field_out.gpu_data[0], self.gh_z,
+        #     host_origin=(0, 0, 2 * s_gh),
+        #     buffer_origin=(0, 0, self.field_out.data[0].shape[2] - 2 * s_gh),
+        #     host_pitches=self._pitches_host_z,
+        #     buffer_pitches=self._pitches_buff,
+        #     region=(self.gh_z[:, 0, 0].nbytes, self.gh_z.shape[1], 1 * s_gh),
+        #     wait_for=evts)
+        # set_gh_zl.wait()
+        # set_gh_zr.wait()
    def get_profiling_info(self):
        for p in self.fine_to_coarse.profile:

--- a/HySoP/hysop/gpu/tests/test_multiresolutionfilter.py
+++ b/HySoP/hysop/gpu/tests/test_multiresolutionfilter.py
@@ -23,6 +23,7 @@ def func(res, x, y, z, t=0):
 def test_filter_linear():
+    """This test compares the GPU linear filter with python implementation"""
    box = Box(length=L, origin=O)
    f = Field(box, formula=func)
    f_py = Field(box, formula=func)
@@ -34,7 +35,7 @@ def test_filter_linear():
                                       Support: 'gpu', })
    op_py = MultiresolutionFilter(d_in=d_fine, d_out=d_coarse,
                                  variables={f_py: d_coarse},
-                                  method={Remesh: Rmsh_Linear,})
+                                  method={Remesh: Rmsh_Linear, })
    op.discretize()
    op.setup()
    op_py.discretize()
@@ -47,43 +48,30 @@ def test_filter_linear():
    f_in = f.discreteFields[topo_fine]
    f_out = f.discreteFields[topo_coarse]
    valid = f_py.discreteFields[topo_coarse]
-    #valid = [npw.zeros(f_out[0].shape), ]
-    #valid = func(valid, *topo_coarse.mesh.coords)
    f_out[0][...] = 0.
    f_out.toDevice()
-    # f_py.discreteFields[topo_fine][0][...] = 0
+    f_in[0][...] = f_py.discreteFields[topo_fine][0]
-    # f_py.discreteFields[topo_fine][0][64+12,8,8] = 1.0
+    f_in.toDevice()
-    # f_py.discreteFields[topo_fine][0][64+13:64+15,5:7,5:7] = 2.0
-    # f_in[0][...] = f_py.discreteFields[topo_fine][0]
-    # f_in.toDevice()
    op.discrete_op.cl_env.queue.finish()
    op.apply(simu)
    op_py.apply(simu)
    f_out.toHost()
    op.discrete_op.cl_env.queue.finish()
-    # print f_out.data[0].shape, valid[0].shape
+    # e = np.abs(valid[0][topo_coarse.mesh.iCompute] -
-    # print np.where(valid[0][topo_coarse.mesh.iCompute]>0.0001)
+    #            f_out[0][topo_coarse.mesh.iCompute])
-    # print np.where(f_out.data[0][topo_coarse.mesh.iCompute]>0.0001)
+    # print np.max(e), np.where(e>1e-6)[0].shape
-    #print valid[0][topo_coarse.mesh.iCompute][32+4:32+8,2:4,2:4]
+    print np.allclose(valid[0][topo_coarse.mesh.iCompute],
-    # print f_out.data[0][topo_coarse.mesh.iCompute][36:40,4:8,4:8]
+                      f_out[0][topo_coarse.mesh.iCompute])
-    # err = valid[0] - f_out[0]
-    # print "MAX X", np.max(f_out[0][0,:,:]), np.max(f_out[0][-1,:,:])
-    #print "MAX Y", np.max(f_out[0][:,0,:]), np.max(f_out[0][:,-1,:])
-    #print "MAX vY", np.max(valid[0][:,1,:]), np.max(valid[0][:,-2,:])
-    # print "MAX Z", np.max(f_out[0][:,:,0]), np.max(f_out[0][:,:,-1])
-    # print np.where(err[:2,:,:] > 0.0001)
-    # print err[:3,-4:,-4:]
-    e = np.max(np.abs(valid[0][topo_coarse.mesh.iCompute] -
-                      f_out[0][topo_coarse.mesh.iCompute]))
-    print e
-    assert np.allclose(valid[0][topo_coarse.mesh.iCompute],
-                       f_out[0][topo_coarse.mesh.iCompute])
    op.profiler.summarize()
    print op.profiler
 def test_filter_L2_1():
+    """
+    This test compares the GPU L2_1 filter with the expected result
+    on the coarse grid and with python implementation.
+    """
    box = Box(length=L, origin=O)
    f = Field(box, formula=func)
    #f_py = Field(box, formula=func)
@@ -110,51 +98,16 @@ def test_filter_L2_1():
    #valid = f_py.discreteFields[topo_coarse]
    valid = [npw.zeros(f_out[0].shape), ]
    valid = func(valid, *topo_coarse.mesh.coords)
-    #valid[0][...] = 0.
    f_out[0][...] = 0.
    f_out.toDevice()
-    #f_py.discreteFields[topo_fine][0][1,1,1] = 1.0
-    #f_py.discreteFields[topo_fine][0][-1,-1,-1] = 2.0
-    #f_py.discreteFields[topo_fine][0][511,8+1,8+1] = 1.0
-    #f_py.discreteFields[topo_fine][0][-4:,:,:] = 1.0
-    # f_py.discreteFields[topo_fine][0][64+13:64+15,5:7,5:7] = 2.0
-    #f_in[0][...] = f_py.discreteFields[topo_fine][0]
-    #f_in.toDevice()
    op.discrete_op.cl_env.queue.finish()
    op.apply(simu)
    #op_py.apply(simu)
    f_out.toHost()
    op.discrete_op.cl_env.queue.finish()
-    # print f_out.data[0].shape, valid[0].shape
-    # print np.where(valid[0][topo_coarse.mesh.iCompute]>0.0001)
-    # print np.where(f_out.data[0][topo_coarse.mesh.iCompute]>0.0001)
-    #print valid[0][topo_coarse.mesh.iCompute][32+4:32+8,2:4,2:4]
-    #print f_out.data[0][topo_coarse.mesh.iCompute][:2,3:7,3:7] - valid[0][topo_coarse.mesh.iCompute][:2,3:7,3:7]
-    # err = valid[0] - f_out[0]
-    #print "MAX X", np.max(f_out[0][:2,:,:]), np.max(f_out[0][-2,:,:])
-    #print "MAX X", np.max(valid[0][:2,:,:]), np.max(valid[0][-2,:,:])
-    #print "MAX Y", np.max(f_out[0][:,0,:]), np.max(f_out[0][:,-1,:])
-    #print "MAX vY", np.max(valid[0][:,1,:]), np.max(valid[0][:,-2,:])
-    # print "MAX Z", np.max(f_out[0][:,:,0]), np.max(f_out[0][:,:,-1])
-    # print np.where(err[:2,:,:] > 0.0001)
-    # print err[:3,-4:,-4:]
-    # print "MAX x", np.max(f_out[0][:2,:,:]), np.max(valid[0][:2,:,:])
-    # print "MAX y", np.max(f_out[0][:,:2,:]), np.max(valid[0][:,:2,:])
-    # print "MAX z", np.max(f_out[0][:,:,:2]), np.max(valid[0][:,:,:2])
-    # print f_out[0][10,10,:6]
-    # print f_out[0][10,10,-6:]
-    # print np.where(np.abs(f_out[0]) > 0.00001)
-    #print valid[0][:2,:4,:4]
-    #print f_out[0][-2:,:4,:4] - valid[0][-2:,:4,:4]
-    #print f_out[0][:2,-4:,:4] - valid[0][:2,-4:,:4]
-    #print f_out[0][:2,:4,-4:] - valid[0][:2,:4,-4:]
-    #print valid[0][topo_coarse.mesh.iCompute][0,1:-1,1:-1] - f_out[0][topo_coarse.mesh.iCompute][0,1:-1,1:-1]
    # e = np.abs(valid[0][topo_coarse.mesh.iCompute] -
    #            f_out[0][topo_coarse.mesh.iCompute])
-    # print np.max(e), np.where(e>1e-7), np.where(e>1e-7)[0].shape
+    # print np.max(e), np.where(e>1e-6)[0].shape
-    ## PB DE CUMLUL DES GHPSTS DANS LES COINS DE DIRECTION EN DIRECTIO?
-    #print np.where(np.abs(valid[0][topo_coarse.mesh.iCompute][:,0,1:-1] - \
-    #             f_out[0][topo_coarse.mesh.iCompute][:,0,1:-1]) > 0.001)
    print np.allclose(valid[0][topo_coarse.mesh.iCompute],
                      f_out[0][topo_coarse.mesh.iCompute])
@@ -163,5 +116,5 @@ def test_filter_L2_1():
 if __name__ == '__main__':
-    #test_filter_linear()
+    test_filter_linear()
    test_filter_L2_1()