From d041573029c1adb43b5a445f8679cc3bb814eb4a Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@imag.fr> Date: Mon, 3 Sep 2012 15:55:34 +0000 Subject: [PATCH] Fix using domains of non [0,0,0] origin. --- HySoP/hysop/fields/topology.py | 9 +-- HySoP/hysop/operator/advection_d.py | 4 +- HySoP/hysop/operator/remeshing.py | 4 +- HySoP/hysop/particular_solvers/gpu.py | 2 +- HySoP/hysop/particular_solvers/gpu_src.cl | 85 +++++++++++------------ 5 files changed, 54 insertions(+), 50 deletions(-) diff --git a/HySoP/hysop/fields/topology.py b/HySoP/hysop/fields/topology.py index c57cc2b18..d993b7197 100644 --- a/HySoP/hysop/fields/topology.py +++ b/HySoP/hysop/fields/topology.py @@ -65,7 +65,7 @@ class CartesianTopology(object): localResolution[i] = nbpoints[i] + remainingPoints[i] start = np.zeros((domain.dimension), dtype=PARMES_INTEGER) start[:self.dim] = self.coords * nbpoints - self.mesh = LocalMesh(self.rank, resolution=localResolution, start=start, dom_size=self.domain.length / self.resolution, ghosts=self.ghosts) + self.mesh = LocalMesh(self.rank, resolution=localResolution, start=start, dom_size=self.domain.length / self.resolution,dom_origin=self.domain.origin, ghosts=self.ghosts) def __str__(self): """ Topology info display """ @@ -117,7 +117,7 @@ class FFTTopology(object): class LocalMesh(object): """ """ - def __init__(self, rank, resolution, start, dom_size, ghosts=np.zeros((3))): + def __init__(self, rank, resolution, start, dom_size, dom_origin, ghosts=np.zeros((3))): # Current process rank in the topology that owns this mesh self.rank = rank # Local resolution @@ -129,8 +129,9 @@ class LocalMesh(object): # Mesh step size self.size = dom_size # Mesh coordinates - coord_start = self.start * self.size - coord_end = (self.end + 1) * self.size + coord_start = self.start * self.size + dom_origin + self.origin = coord_start + coord_end = (self.end + 1) * self.size + dom_origin if self.start.size == 3: self.coords = np.ogrid[coord_start[0]:coord_end[0]:self.size[0], coord_start[1]:coord_end[1]:self.size[1], diff --git a/HySoP/hysop/operator/advection_d.py b/HySoP/hysop/operator/advection_d.py index 090c695ec..a291e6673 100644 --- a/HySoP/hysop/operator/advection_d.py +++ b/HySoP/hysop/operator/advection_d.py @@ -68,7 +68,9 @@ class Advection_P(DiscreteOperator): # Advection evt = self.numMethod.launch(self.velocity.gpu_data[splittingDirection], self.res_position.gpu_data, - PARMES_REAL_GPU(dt)) + PARMES_REAL_GPU(dt), + PARMES_REAL_GPU(self.scalar.domain.origin[splittingDirection]), + PARMES_REAL_GPU(self.scalar.domain.step[splittingDirection])) for df in self.output: df.contains_data = False self.numMethod.queue.finish() diff --git a/HySoP/hysop/operator/remeshing.py b/HySoP/hysop/operator/remeshing.py index 779aa93db..2b84c868c 100644 --- a/HySoP/hysop/operator/remeshing.py +++ b/HySoP/hysop/operator/remeshing.py @@ -53,7 +53,9 @@ class Remeshing(DiscreteOperator): ## Launching kernel evt = self.numMethod.launch(self.ppos.gpu_data, self.pscal.gpu_data, - self.res_scalar.gpu_data) + self.res_scalar.gpu_data, + PARMES_REAL_GPU(self.res_scalar.domain.origin[splittingDirection]), + PARMES_REAL_GPU(self.res_scalar.domain.step[splittingDirection])) for df in self.output: df.contains_data = False self.numMethod.queue.finish() diff --git a/HySoP/hysop/particular_solvers/gpu.py b/HySoP/hysop/particular_solvers/gpu.py index a8dd50c8a..80f60c296 100644 --- a/HySoP/hysop/particular_solvers/gpu.py +++ b/HySoP/hysop/particular_solvers/gpu.py @@ -183,7 +183,7 @@ class GPUParticleSolver(ParticleSolver): dim = df.topology.dim coord_min = np.ones(4, dtype=PARMES_REAL_GPU) mesh_size = np.ones(4, dtype=PARMES_REAL_GPU) - coord_min[0:dim] = df.topology.mesh.start + coord_min[0:dim] = df.topology.mesh.origin mesh_size[0:dim] = df.topology.mesh.size if f.vector: if dim == 3: diff --git a/HySoP/hysop/particular_solvers/gpu_src.cl b/HySoP/hysop/particular_solvers/gpu_src.cl index 71517b5ea..6cc51af57 100644 --- a/HySoP/hysop/particular_solvers/gpu_src.cl +++ b/HySoP/hysop/particular_solvers/gpu_src.cl @@ -185,13 +185,13 @@ __kernel void advec_init_transpose_3D_02(__global const float* input, /* Advection Basique */ __kernel void advection(__global const float* gvelo, __global float* ppos, - float dt) + float dt, float min_position, float dx) { uint gidX = get_global_id(0); uint gidY = get_global_id(1); uint gidZ = get_global_id(2); int ind, i_ind, i_ind_p; - float dx = 1.0f/(1.0f*WIDTH) ,invdx = (1.0f*WIDTH); + float invdx = 1.0f/dx; uint i; float v, y, p; for(i=gidX; i<WIDTH; i+=WGN) @@ -202,7 +202,7 @@ __kernel void advection(__global const float* gvelo, y = (p - convert_float(ind) * dx) * invdx; i_ind = ((ind + WIDTH) % WIDTH); i_ind_p = ((ind + 1) % WIDTH); - p=(gvelo[i_ind+gidY*WIDTH+gidZ*WIDTH*WIDTH]*(1.0f - y) + gvelo[i_ind_p+gidY*WIDTH+gidZ*WIDTH*WIDTH]*y)*dt + i*dx; + p=(gvelo[i_ind+gidY*WIDTH+gidZ*WIDTH*WIDTH]*(1.0f - y) + gvelo[i_ind_p+gidY*WIDTH+gidZ*WIDTH*WIDTH]*y)*dt + i*dx + min_position; ppos[i+gidY*WIDTH+gidZ*WIDTH*WIDTH] = p; } } @@ -218,13 +218,13 @@ Contraintes : */ __kernel void advection(__global const float* gvelo, __global float* ppos, - float dt) + float dt, float min_position, float dx) { uint gidX = get_global_id(0); uint gidY = get_global_id(1); uint gidZ = get_global_id(2); int4 ind, i_ind, i_ind_p; - float dx = 1.0f/(1.0f*WIDTH) ,invdx = (1.0f*WIDTH); + float invdx = 1.0f/dx; uint i; float4 v, vp, y, p, coord; __local float velocity_cache[WIDTH]; @@ -251,7 +251,7 @@ __kernel void advection(__global const float* gvelo, i_ind_p = ((i_ind + 1) % WIDTH); v = (float4)(velocity_cache[i_ind.x], velocity_cache[i_ind.y], velocity_cache[i_ind.z], velocity_cache[i_ind.w]); vp = (float4)(velocity_cache[i_ind_p.x], velocity_cache[i_ind_p.y], velocity_cache[i_ind_p.z], velocity_cache[i_ind_p.w]); - p = (v*(1.0f - y) + vp*y)*dt + coord; + p = (v*(1.0f - y) + vp*y)*dt + coord + (float4)(min_position); vstore4(p, (i+gidY*WIDTH+gidZ*WIDTH*WIDTH)/4, ppos); } } @@ -262,12 +262,12 @@ __kernel void advection(__global const float* gvelo, /* Remaillage basique */ __kernel void remeshing(__global const float* ppos, __global const float* pscal, - __global float* gscal) + __global float* gscal, float min_position, float dx) { uint gidX = get_global_id(0); uint gidY = get_global_id(1); uint gidZ = get_global_id(2); - float dx = 1.0f/(1.0f*WIDTH) ,invdx = (1.0f*WIDTH); + float invdx = 1.0f/dx; int ind; uint i, nb_part = WIDTH/WGN; float p, s, y; @@ -280,7 +280,7 @@ __kernel void remeshing(__global const float* ppos, barrier(CLK_LOCAL_MEM_FENCE); for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=1) { - p = ppos[(i + gidY*WIDTH + gidZ*WIDTH*WIDTH)]; + p = ppos[(i + gidY*WIDTH + gidZ*WIDTH*WIDTH)] - min_position; ind = convert_int_rtn(p * invdx); y = (p - convert_float(ind) * dx) * invdx; s = pscal[(i + gidY*(WIDTH+PADDING) + gidZ*(WIDTH+PADDING)*(WIDTH+PADDING))]; @@ -313,16 +313,15 @@ Contraintes : */ __kernel void remeshing(__global const float* ppos, __global const float* pscal, - __global float* gscal) + __global float* gscal, float min_position, float dx) { uint gidX = get_global_id(0); uint gidY = get_global_id(1); uint gidZ = get_global_id(2); - float dx = 1.0f/(1.0f*WIDTH) ,invdx = (1.0f*WIDTH); - int ind; - int4 ind4; + float invdx = 1.0f/dx; + int4 ind; uint i, nb_part = WIDTH/WGN; - float4 p4, s4, y4; + float4 p, s, y; uint4 index4; __local float gscal_loc[WIDTH]; @@ -332,40 +331,40 @@ __kernel void remeshing(__global const float* ppos, barrier(CLK_LOCAL_MEM_FENCE); for(i=gidX*nb_part; i<(gidX + 1)*nb_part; i+=4) { - p4 = vload4((i + gidY*WIDTH + gidZ*WIDTH*WIDTH)/4, ppos); - s4 = vload4((i + gidY*(WIDTH+PADDING) + gidZ*(WIDTH+PADDING)*(WIDTH+PADDING))/4, pscal); - ind4 = convert_int4_rtn(p4 * invdx); - y4 = (p4 - convert_float4(ind4) * dx) * invdx; - index4 = convert_uint4((ind4 - 2 + WIDTH) % WIDTH); - gscal_loc[index4.x] += (alpha(y4.x, s4.x)); - gscal_loc[index4.y] += (alpha(y4.y, s4.y)); - gscal_loc[index4.z] += (alpha(y4.z, s4.z)); - gscal_loc[index4.w] += (alpha(y4.w, s4.w)); + p = vload4((i + gidY*WIDTH + gidZ*WIDTH*WIDTH)/4, ppos) - (float4)(min_position); + s = vload4((i + gidY*(WIDTH+PADDING) + gidZ*(WIDTH+PADDING)*(WIDTH+PADDING))/4, pscal); + ind = convert_int4_rtn(p * invdx); + y = (p - convert_float4(ind) * dx) * invdx; + index4 = convert_uint4((ind - 2 + WIDTH) % WIDTH); + gscal_loc[index4.x] += (alpha(y.x, s.x)); + gscal_loc[index4.y] += (alpha(y.y, s.y)); + gscal_loc[index4.z] += (alpha(y.z, s.z)); + gscal_loc[index4.w] += (alpha(y.w, s.w)); index4 = (index4 + 1) % WIDTH; - gscal_loc[index4.x] += (beta(y4.x,s4.x)); - gscal_loc[index4.y] += (beta(y4.y,s4.y)); - gscal_loc[index4.z] += (beta(y4.z,s4.z)); - gscal_loc[index4.w] += (beta(y4.w,s4.w)); + gscal_loc[index4.x] += (beta(y.x,s.x)); + gscal_loc[index4.y] += (beta(y.y,s.y)); + gscal_loc[index4.z] += (beta(y.z,s.z)); + gscal_loc[index4.w] += (beta(y.w,s.w)); index4 = (index4 + 1) % WIDTH; - gscal_loc[index4.x] += (gamma(y4.x, s4.x)); - gscal_loc[index4.y] += (gamma(y4.y, s4.y)); - gscal_loc[index4.z] += (gamma(y4.z, s4.z)); - gscal_loc[index4.w] += (gamma(y4.w, s4.w)); + gscal_loc[index4.x] += (gamma(y.x, s.x)); + gscal_loc[index4.y] += (gamma(y.y, s.y)); + gscal_loc[index4.z] += (gamma(y.z, s.z)); + gscal_loc[index4.w] += (gamma(y.w, s.w)); index4 = (index4 + 1) % WIDTH; - gscal_loc[index4.x] += (delta(y4.x, s4.x)); - gscal_loc[index4.y] += (delta(y4.y, s4.y)); - gscal_loc[index4.z] += (delta(y4.z, s4.z)); - gscal_loc[index4.w] += (delta(y4.w, s4.w)); + gscal_loc[index4.x] += (delta(y.x, s.x)); + gscal_loc[index4.y] += (delta(y.y, s.y)); + gscal_loc[index4.z] += (delta(y.z, s.z)); + gscal_loc[index4.w] += (delta(y.w, s.w)); index4 = (index4 + 1) % WIDTH; - gscal_loc[index4.x] += (eta(y4.x, s4.x)); - gscal_loc[index4.y] += (eta(y4.y, s4.y)); - gscal_loc[index4.z] += (eta(y4.z, s4.z)); - gscal_loc[index4.w] += (eta(y4.w, s4.w)); + gscal_loc[index4.x] += (eta(y.x, s.x)); + gscal_loc[index4.y] += (eta(y.y, s.y)); + gscal_loc[index4.z] += (eta(y.z, s.z)); + gscal_loc[index4.w] += (eta(y.w, s.w)); index4 = (index4 + 1) % WIDTH; - gscal_loc[index4.x] += (zeta(y4.x, s4.x)); - gscal_loc[index4.y] += (zeta(y4.y, s4.y)); - gscal_loc[index4.z] += (zeta(y4.z, s4.z)); - gscal_loc[index4.w] += (zeta(y4.w, s4.w)); + gscal_loc[index4.x] += (zeta(y.x, s.x)); + gscal_loc[index4.y] += (zeta(y.y, s.y)); + gscal_loc[index4.z] += (zeta(y.z, s.z)); + gscal_loc[index4.w] += (zeta(y.w, s.w)); } barrier(CLK_LOCAL_MEM_FENCE); for(i=gidX*4; i<WIDTH; i+=(WGN*4)) -- GitLab