Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
hysop
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
particle_methods
hysop
Commits
631377d8
Commit
631377d8
authored
10 years ago
by
Jean-Matthieu Etancelin
Committed by
Franck Pérignon
10 years ago
Browse files
Options
Downloads
Patches
Plain Diff
Multi-GPU:ok (remaillage)
parent
7fa3d2e7
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
+0
-382
0 additions, 382 deletions
HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
HySoP/hysop/gpu/multi_gpu_particle_advection.py
+15
-67
15 additions, 67 deletions
HySoP/hysop/gpu/multi_gpu_particle_advection.py
with
15 additions
and
449 deletions
HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
+
0
−
382
View file @
631377d8
...
...
@@ -177,385 +177,3 @@ float y; /* Normalized distance to nearest left grid point */
buffer_r[lid
+
gidY*BUFF_WIDTH
+
gidZ*BUFF_WIDTH*NB_II]
=
r_buff_loc[lid]
;
}
__kernel
void
buff_remesh_in
(
__global
const
float*
ppos,
__global
const
float*
pscal,
__global
float*
gscal,
__constant
struct
AdvectionMeshInfo*
mesh
)
{
int
lid
=
get_local_id
(
0
)
; /* OpenCL work-itme global index (X) */
int
gidY
=
get_global_id
(
1
)
; /* OpenCL work-itme global index (Y) */
int
gidZ
=
get_global_id
(
2
)
; /* OpenCL work-itme global index (Z) */
int
i
; /* Particle index in 1D problem */
float
p
; /* Particle position */
float
s
; /* Particle scalar */
float
y
; /* Normalized distance to nearest left grid point */
int
ind
; /* Integer coordinate */
int
index
; /* Remeshing index */
float
w
;
uint
line_index
=
gidY*NB_I+
gidZ*NB_I*NB_II
; /* Current 1D problem index */
__local
float
gscal_loc[NB_I]
;
/*
//
Initialize
buffers
*/
/*
if
(
lid
<
BUFF_WIDTH
)
*/
/*
l_buff_loc[lid]
=
0.0
; */
/*
if
(
lid
<
BUFF_WIDTH
)
*/
/*
r_buff_loc[lid]
=
0.0
; */
for
(
i=lid
; i<NB_I; i+=WI_NB)
{
/*
Initialize
result
buffer
*/
gscal_loc[i]
=
0.0
;
}
/*
Synchronize
work-group
*/
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
i=lid*PART_NB_PER_WI
; i<(lid + 1)*PART_NB_PER_WI; i+=1)
{
/*
Read
particle
position
*/
p
=
ppos[i
+
line_index]
;
/*
Read
particle
scalar
*/
s
=
pscal[i
+
line_index]
;
/*
Remesh
particle
*/
ind
=
convert_int_rtn
(
p
*
mesh->invdx
)
;
y
=
(
p
-
convert_float
(
ind
)
*
mesh->dx.x
)
*
mesh->invdx
;
index
=
ind
-
REMESH_SHIFT
;
w
=
REMESH
(
alpha
)(
y
)
;
//
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
;
w
=
w
*
s
;
//
(
*loc_ptr
)
+=
w
;
if
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
gscal_loc[noBC_id
(
index-START_INDEX
)
]
+=
w
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
index
=
index
+
1
;
w
=
REMESH
(
beta
)(
y
)
;
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
;
w
=
w
*
s
;
//
(
*loc_ptr
)
+=
w
;
if
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
gscal_loc[noBC_id
(
index-START_INDEX
)
]
+=
w
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
index
=
index
+
1
;
w
=
REMESH
(
gamma
)(
y
)
;
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
;
w
=
w
*
s
;
//
(
*loc_ptr
)
+=
w
;
if
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
gscal_loc[noBC_id
(
index-START_INDEX
)
]
+=
w
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
index
=
index
+
1
;
w
=
REMESH
(
delta
)(
y
)
;
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
;
w
=
w
*
s
;
//
(
*loc_ptr
)
+=
w
;
if
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
gscal_loc[noBC_id
(
index-START_INDEX
)
]
+=
w
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
#
if
REMESH_SHIFT
>
1
*/
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
eta
)(
y
)
; */
/*
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
//
(
*loc_ptr
)
+=
w
; */
/*
if
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
*/
/*
gscal_loc[noBC_id
(
index-START_INDEX
)
]
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
zeta
)(
y
)
; */
/*
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
//
(
*loc_ptr
)
+=
w
; */
/*
if
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
*/
/*
gscal_loc[noBC_id
(
index-START_INDEX
)
]
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
#
endif
*/
/*
#
if
REMESH_SHIFT
>
2
*/
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
theta
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
iota
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
#
endif
*/
/*
#
if
REMESH_SHIFT
>
3
*/
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
kappa
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
mu
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
#
endif
*/
}
/*
Synchronize
work-group
*/
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
i=lid
; i<NB_I; i+=WI_NB)
{
/*
Store
result
*/
gscal[i
+
line_index]
=
gscal_loc[noBC_id
(
i
)
]
;
}
/*
//
Store
buffers
*/
/*
if
(
lid
<
BUFF_WIDTH
)
*/
/*
buffer_l[lid
+
gidY*BUFF_WIDTH
+
gidZ*BUFF_WIDTH*NB_II]
=
l_buff_loc[lid]
; */
/*
if
(
lid
<
BUFF_WIDTH
)
*/
/*
buffer_r[lid
+
gidY*BUFF_WIDTH
+
gidZ*BUFF_WIDTH*NB_II]
=
r_buff_loc[lid]
; */
}
__kernel
void
buff_remesh_out
(
__global
const
float*
ppos,
__global
const
float*
pscal,
__global
float*
buffer_l,
__global
float*
buffer_r,
__constant
struct
AdvectionMeshInfo*
mesh
)
{
int
lid
=
get_local_id
(
0
)
; /* OpenCL work-itme global index (X) */
int
gidY
=
get_global_id
(
1
)
; /* OpenCL work-itme global index (Y) */
int
gidZ
=
get_global_id
(
2
)
; /* OpenCL work-itme global index (Z) */
int
i
; /* Particle index in 1D problem */
float
p
; /* Particle position */
float
s
; /* Particle scalar */
float
y
; /* Normalized distance to nearest left grid point */
int
ind
; /* Integer coordinate */
int
index
; /* Remeshing index */
float
w
;
uint
line_index
=
gidY*NB_I+
gidZ*NB_I*NB_II
; /* Current 1D problem index */
__local
float
l_buff_loc[BUFF_WIDTH]
;
__local
float
r_buff_loc[BUFF_WIDTH]
;
__local
float*
loc_ptr
;
//
Initialize
buffers
if
(
lid
<
BUFF_WIDTH
)
l_buff_loc[lid]
=
0.0
;
if
(
lid
<
BUFF_WIDTH
)
r_buff_loc[lid]
=
0.0
;
/*
for
(
i=lid
; i<NB_I; i+=WI_NB) */
/*
{
*/
/*
/\*
Initialize
result
buffer
*\/
*/
/*
gscal_loc[i]
=
0.0
; */
/*
}
*/
/*
Synchronize
work-group
*/
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
i=lid*PART_NB_PER_WI
; i<(lid + 1)*PART_NB_PER_WI; i+=1)
{
/*
Read
particle
position
*/
p
=
ppos[i
+
line_index]
;
/*
Read
particle
scalar
*/
s
=
pscal[i
+
line_index]
;
/*
Remesh
particle
*/
ind
=
convert_int_rtn
(
p
*
mesh->invdx
)
;
y
=
(
p
-
convert_float
(
ind
)
*
mesh->dx.x
)
*
mesh->invdx
;
index
=
ind
-
REMESH_SHIFT
;
w
=
REMESH
(
alpha
)(
y
)
;
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
;
w
=
w
*
s
;
//
(
*loc_ptr
)
+=
w
;
if
(
index<START_INDEX
)
l_buff_loc[index-
(
START_INDEX-1-BUFF_WIDTH+1
)
]
+=
w
;
if
(
index>STOP_INDEX
)
r_buff_loc[index-
(
STOP_INDEX+1
)
]
+=
w
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
index
=
index
+
1
;
w
=
REMESH
(
beta
)(
y
)
;
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
;
w
=
w
*
s
;
//
(
*loc_ptr
)
+=
w
;
if
(
index<START_INDEX
)
l_buff_loc[index-
(
START_INDEX-1-BUFF_WIDTH+1
)
]
+=
w
;
if
(
index>STOP_INDEX
)
r_buff_loc[index-
(
STOP_INDEX+1
)
]
+=
w
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
index
=
index
+
1
;
w
=
REMESH
(
gamma
)(
y
)
;
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
;
w
=
w
*
s
;
//
(
*loc_ptr
)
+=
w
;
if
(
index<START_INDEX
)
l_buff_loc[index-
(
START_INDEX-1-BUFF_WIDTH+1
)
]
+=
w
;
if
(
index>STOP_INDEX
)
r_buff_loc[index-
(
STOP_INDEX+1
)
]
+=
w
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
index
=
index
+
1
;
w
=
REMESH
(
delta
)(
y
)
;
//loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
;
w
=
w
*
s
;
//
(
*loc_ptr
)
+=
w
;
if
(
index<START_INDEX
)
l_buff_loc[index-
(
START_INDEX-1-BUFF_WIDTH+1
)
]
+=
w
;
if
(
index>STOP_INDEX
)
r_buff_loc[index-
(
STOP_INDEX+1
)
]
+=
w
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
#
if
REMESH_SHIFT
>
1
*/
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
eta
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
zeta
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
#
endif
*/
/*
#
if
REMESH_SHIFT
>
2
*/
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
theta
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
iota
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
#
endif
*/
/*
#
if
REMESH_SHIFT
>
3
*/
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
kappa
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
index
=
index
+
1
; */
/*
w
=
REMESH
(
mu
)(
y
)
; */
/*
loc_ptr
=
(
index>=START_INDEX
&&
index
<=
STOP_INDEX
)
?
gscal_loc
+noBC_id
(
index-START_INDEX
)
:
(
(
index<START_INDEX
)
?
l_buff_loc+index-
(
START_INDEX-1-BUFF_WIDTH+1
)
:
r_buff_loc
+
index-
(
STOP_INDEX+1
)
)
; */
/*
w
=
w
*
s
; */
/*
(
*loc_ptr
)
+=
w
; */
/*
barrier
(
CLK_LOCAL_MEM_FENCE
)
; */
/*
#
endif
*/
}
/*
Synchronize
work-group
*/
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
for
(
i=lid
; i<NB_I; i+=WI_NB) */
/*
{
*/
/*
/\*
Store
result
*\/
*/
/*
gscal[i
+
line_index]
=
gscal_loc[noBC_id
(
i
)
]
; */
/*
}
*/
//
Store
buffers
if
(
lid
<
BUFF_WIDTH
)
buffer_l[lid
+
gidY*BUFF_WIDTH
+
gidZ*BUFF_WIDTH*NB_II]
=
l_buff_loc[lid]
;
if
(
lid
<
BUFF_WIDTH
)
buffer_r[lid
+
gidY*BUFF_WIDTH
+
gidZ*BUFF_WIDTH*NB_II]
=
r_buff_loc[lid]
;
}
This diff is collapsed.
Click to expand it.
HySoP/hysop/gpu/multi_gpu_particle_advection.py
+
15
−
67
View file @
631377d8
...
...
@@ -26,7 +26,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
@debug
def
__init__
(
self
,
platform_id
=
None
,
device_id
=
None
,
device_type
=
None
,
user_src
=
None
,
max_cfl
=
5
,
is_multipass
=
False
,
**
kwds
):
user_src
=
None
,
max_cfl
=
5
,
**
kwds
):
"""
Create a Advection operator.
Work on a given field (scalar or vector) at a given velocity to compute
...
...
@@ -53,7 +53,6 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
# Work-item number for reductions
self
.
_wi_nb_reduce
=
32
self
.
is_multipass
=
is_multipass
super
(
MultiGPUParticleAdvection
,
self
).
__init__
(
platform_id
=
platform_id
,
device_id
=
device_id
,
...
...
@@ -284,22 +283,12 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
'
kernels/minmax_buffers.cl
'
]
prg
=
self
.
cl_env
.
build_src
(
src
,
build_options
,
1
)
#self.num_reduce_stage1_rmsh = KernelLauncher(
# prg.reduce_stage1_rmsh, self.cl_env.queue,
# (self._wi_nb_reduce, 1, int(self.resol_dir[2])),
# (self._wi_nb_reduce, 1, 1))
self
.
num_reduce_stage2
=
KernelLauncher
(
prg
.
reduce_stage2
,
self
.
cl_env
.
queue
,
(
self
.
_wi_nb_reduce
,
1
,
1
),
(
self
.
_wi_nb_reduce
,
1
,
1
))
self
.
num_remesh
=
KernelLauncher
(
prg
.
buff_remesh
,
self
.
cl_env
.
queue
,
gwi
,
lwi
)
self
.
num_remesh_in
=
KernelLauncher
(
prg
.
buff_remesh_in
,
self
.
cl_env
.
queue
,
gwi
,
lwi
)
self
.
num_remesh_out
=
KernelLauncher
(
prg
.
buff_remesh_out
,
self
.
cl_env
.
queue
,
gwi
,
lwi
)
self
.
_temp_minmax
=
npw
.
int_zeros
((
self
.
resol_dir
[
2
]
*
12
))
self
.
_global_minmax
=
npw
.
int_zeros
((
12
))
...
...
@@ -336,26 +325,6 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
self
.
_other_shape_l
=
npw
.
int_zeros
((
self
.
dim
,
))
self
.
_other_shape_r
=
npw
.
int_zeros
((
self
.
dim
,
))
# def _local_buffer_allocations(self):
# """Local memory array for kernels"""
# if not self._is_cut_dir:
# super(MultiGPUParticleAdvection, self)._local_buffer_allocations()
# else:
# nbC = self.fields_on_grid[0].nbComponents
# loc_arrays = [self.v_resol_dir[0]]
# loc_type = [PARMES_REAL]
# loc_arrays += [self.resol_dir[0]] * (nbC)
# loc_type += [PARMES_REAL] * (nbC)
# loc_arrays += [self._wi_nb_reduce * 12, 5, 5]
# loc_type += [PARMES_INTEGER, PARMES_REAL, PARMES_REAL]
# self._num_locMem, self.size_local_alloc = \
# self.cl_env.LocalMemAllocator(loc_arrays, type_list=loc_type)
# self._loc_velo = self._num_locMem[0]
# self._loc_scal = self._num_locMem[1:nbC + 1]
# self._loc_minmax = self._num_locMem[nbC + 1]
# self._loc_buff_l = self._num_locMem[nbC + 2]
# self._loc_buff_r = self._num_locMem[nbC + 3]
def
_comm_buffer_reallocation
(
self
,
shape
,
buff
,
cl_buff
,
cl_buff_loc
):
"""
Reallocate the buffer according to a shape
"""
self
.
cl_env
.
global_deallocation
(
cl_buff
)
...
...
@@ -655,40 +624,25 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
self
.
_other_tmp_buffer_l
[...]
=
0.
self
.
_other_tmp_buffer_r
[...]
=
0.
if
self
.
is_multipass
:
evt_num_remesh_out
=
self
.
num_remesh_out
(
self
.
part_position
[
0
],
self
.
fields_on_part
[
self
.
fields_on_grid
[
0
]][
0
],
self
.
_cl_buffer_l
,
self
.
_cl_buffer_r
,
self
.
_cl_mesh_info
)
#print (evt_num_remesh_in.profile.end - evt_num_remesh_in.profile.start) * 1e-9, (evt_num_remesh_out.profile.end - evt_num_remesh_out.profile.start) * 1e-9
else
:
evt_num_remesh_in
=
self
.
num_remesh
(
self
.
part_position
[
0
],
self
.
fields_on_part
[
self
.
fields_on_grid
[
0
]][
0
],
self
.
fields_on_grid
[
0
].
gpu_data
[
0
],
self
.
_cl_buffer_l
,
self
.
_cl_buffer_r
,
self
.
_cl_mesh_info
)
evt_num_remesh_out
=
evt_num_remesh_in
# evt_num_remesh.wait()
# print (evt_num_remesh.profile.end - evt_num_remesh.profile.start) * 1e-9
evt_num_remesh
=
self
.
num_remesh
(
self
.
part_position
[
0
],
self
.
fields_on_part
[
self
.
fields_on_grid
[
0
]][
0
],
self
.
fields_on_grid
[
0
].
gpu_data
[
0
],
self
.
_cl_buffer_l
,
self
.
_cl_buffer_r
,
self
.
_cl_mesh_info
)
# Get buffers values
evt_get_l
=
cl
.
enqueue_copy
(
self
.
cl_env
.
queue
,
self
.
_buffer_l
,
self
.
_cl_buffer_l
,
wait_for
=
[
evt_num_remesh
_out
])
wait_for
=
[
evt_num_remesh
])
evt_get_r
=
cl
.
enqueue_copy
(
self
.
cl_env
.
queue
,
self
.
_buffer_r
,
self
.
_cl_buffer_r
,
wait_for
=
[
evt_num_remesh
_out
])
wait_for
=
[
evt_num_remesh
])
if
CL_PROFILE
:
evt_get_l
.
wait
()
evt_get_r
.
wait
()
# Ensure that the previous OpenCL transfers are finished
evt_get_l
.
wait
()
evt_get_r
.
wait
()
# Send/receieve the _l and _r buffers
ctime
=
MPI
.
Wtime
()
l_recv
=
self
.
_comm
.
Irecv
(
...
...
@@ -697,6 +651,9 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
r_recv
=
self
.
_comm
.
Irecv
(
[
self
.
_other_tmp_buffer_r
,
self
.
_buffer_size
,
PARMES_MPI_REAL
],
source
=
self
.
_L_rk
,
tag
=
888
+
self
.
_L_rk
)
# Ensure that the previous OpenCL transfers are finished
evt_get_l
.
wait
()
evt_get_r
.
wait
()
l_send
=
self
.
_comm
.
Issend
(
[
self
.
_buffer_l
,
self
.
_buffer_size
,
PARMES_MPI_REAL
],
dest
=
self
.
_L_rk
,
tag
=
333
+
self
.
_comm_rank
)
...
...
@@ -710,13 +667,6 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
r_recv
.
wait
()
rmsh_mpi_time
=
MPI
.
Wtime
()
-
ctime
if
self
.
is_multipass
:
evt_num_remesh_in
=
self
.
num_remesh_in
(
self
.
part_position
[
0
],
self
.
fields_on_part
[
self
.
fields_on_grid
[
0
]][
0
],
self
.
fields_on_grid
[
0
].
gpu_data
[
0
],
self
.
_cl_mesh_info
)
evt_get_other_l
=
cl
.
enqueue_copy
(
self
.
cl_env
.
queue
,
self
.
_other_buffer_l
,
self
.
fields_on_grid
[
0
].
gpu_data
[
0
],
...
...
@@ -727,7 +677,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
buffer_pitches
=
self
.
_pitches_dev
,
host_pitches
=
self
.
_pitches_host
,
region
=
self
.
_buffer_region
,
wait_for
=
[
evt_num_remesh
_in
])
wait_for
=
[
evt_num_remesh
])
# Get the initial values for recieve other contributions into
evt_get_other_r
=
cl
.
enqueue_copy
(
self
.
cl_env
.
queue
,
self
.
_other_buffer_r
,
...
...
@@ -737,7 +687,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
buffer_pitches
=
self
.
_pitches_dev
,
host_pitches
=
self
.
_pitches_host
,
region
=
self
.
_buffer_region
,
wait_for
=
[
evt_num_remesh
_in
])
wait_for
=
[
evt_num_remesh
])
l_send
.
wait
()
r_send
.
wait
()
...
...
@@ -791,8 +741,6 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
if
self
.
_is_cut_dir
:
for
k
in
[
self
.
num_reduce_stage1_advec
,
#self.num_reduce_stage1_rmsh,
self
.
num_remesh_in
,
self
.
num_remesh_out
,
self
.
num_reduce_stage2
]:
if
k
is
not
None
:
for
p
in
k
.
profile
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment