Skip to content
Snippets Groups Projects
Commit 8a85a3e1 authored by Keck Jean-Baptiste's avatar Keck Jean-Baptiste
Browse files

debug intel

parent bf57ee6b
No related branches found
No related tags found
No related merge requests found
...@@ -111,8 +111,9 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): ...@@ -111,8 +111,9 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator):
args['width'] = CodegenVariable('width', itype, typegen, add_impl_const=True,nl=True) args['width'] = CodegenVariable('width', itype, typegen, add_impl_const=True,nl=True)
else: else:
args['offset'] = CodegenVariable('offset', itype, typegen, add_impl_const=True,nl=True) args['offset'] = CodegenVariable('offset', itype, typegen, add_impl_const=True,nl=True)
args['lidx'] = CodegenVariable('lidx', itype, typegen, add_impl_const=True) args['lidx'] = CodegenVariable('lidx', itype, typegen, add_impl_const=True)
args['Lx'] = CodegenVariable('Lx', itype, typegen, add_impl_const=True, nl=True) args['Lx'] = CodegenVariable('Lx', itype, typegen, add_impl_const=True, nl=True)
args['active'] = CodegenVariable('active','bool',typegen)
basename = 'stretching_rhs_{}_{}{}{}_fdc{}'.format(sformulation.lower(),ftype[0],dim,('v' if vectorize_u else ''),order) basename = 'stretching_rhs_{}_{}{}{}_fdc{}'.format(sformulation.lower(),ftype[0],dim,('v' if vectorize_u else ''),order)
basename+='_'+XYZ[direction] basename+='_'+XYZ[direction]
...@@ -223,10 +224,12 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): ...@@ -223,10 +224,12 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator):
rk_step = s.args['rk_step'] rk_step = s.args['rk_step']
offset = s.args['offset'] offset = s.args['offset']
active = s.args['active']
dw_dt = CodegenVectorClBuiltin('dW_dt', ftype, dim, tg) dw_dt = CodegenVectorClBuiltin('dW_dt', ftype, dim, tg)
ghosts = CodegenVariable('ghosts', itype, tg, const=True, value=order/2, symbolic_mode=True) ghosts = CodegenVariable('ghosts', itype, tg, const=True, value=order/2, symbolic_mode=True)
with s._function_(): with s._function_():
s.jumpline() s.jumpline()
...@@ -236,7 +239,7 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): ...@@ -236,7 +239,7 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator):
s.jumpline() s.jumpline()
s.comment('Synchronize required vorticiy component across workitems') s.comment('Synchronize required vorticiy component across workitems')
s.barrier(_local=cached,_global=not cached) s.barrier(_local=cached,_global=not cached)
with s._block_(): with s._if_(active()):
code = '{} = {};'.format(Wd[offset()], W[direction]) code = '{} = {};'.format(Wd[offset()], W[direction])
s.append(code) s.append(code)
s.barrier(_local=cached,_global=not cached) s.barrier(_local=cached,_global=not cached)
...@@ -245,11 +248,11 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator): ...@@ -245,11 +248,11 @@ class DirectionalStretchingRhsFunction(OpenClFunctionCodeGenerator):
dw_dt.declare(s,init=0) dw_dt.declare(s,init=0)
if is_conservative: if is_conservative:
cond = '({lid}>={step}*{ghosts}) && ({lid}<{L}-{step}*{ghosts})'.format( cond = '({active}) && ({lid}>={step}*{ghosts}) && ({lid}<{L}-{step}*{ghosts})'.format(
lid=lidx(), L=Lx(), ghosts=ghosts(), step='({}+1)'.format(rk_step())) active=active(), lid=lidx(), L=Lx(), ghosts=ghosts(), step='({}+1)'.format(rk_step()))
else: else:
cond = '({lid}>={ghosts}) && ({lid}<{L}-{ghosts})'.format( cond = '({active}) && ({lid}>={ghosts}) && ({lid}<{L}-{ghosts})'.format(
lid=lidx(), L=Lx(), ghosts=ghosts()) active=active(), lid=lidx(), L=Lx(), ghosts=ghosts())
with s._if_(cond): with s._if_(cond):
......
...@@ -158,7 +158,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ...@@ -158,7 +158,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator):
def required_workgroup_cache_size(self, local_work_size): def required_workgroup_cache_size(self, local_work_size):
dim = self.work_dim dim = self.work_dim
ftype = self.ftype ftype = self.ftype
is_cached = self.is_cached is_cached = self.is_cached
direction = self.direction direction = self.direction
cache_ghosts = self.cache_ghosts() cache_ghosts = self.cache_ghosts()
is_periodic = self.is_periodic is_periodic = self.is_periodic
...@@ -233,7 +233,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ...@@ -233,7 +233,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator):
add_impl_const=True,nl=True) add_impl_const=True,nl=True)
_global = OpenClCodeGenerator.default_keywords['global'] _global = OpenClCodeGenerator.default_keywords['global']
_local = OpenClCodeGenerator.default_keywords['local'] _local = OpenClCodeGenerator.default_keywords['local']
for i in xrange(work_dim): for i in xrange(work_dim):
name = svorticity+xyz[i] name = svorticity+xyz[i]
kargs[name] = CodegenVariable(storage=_global,name=name,typegen=typegen, kargs[name] = CodegenVariable(storage=_global,name=name,typegen=typegen,
...@@ -313,7 +313,8 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ...@@ -313,7 +313,8 @@ class DirectionalStretchingKernel(KernelCodeGenerator):
W = CodegenVectorClBuiltin('W',ftype,dim,tg) W = CodegenVectorClBuiltin('W',ftype,dim,tg)
U = CodegenVectorClBuiltin('U',ftype,dim,tg) U = CodegenVectorClBuiltin('U',ftype,dim,tg)
first = CodegenVariable('first','bool',tg,init='true') first = CodegenVariable('first','bool',tg,init='true')
active = CodegenVariable('active','bool',tg)
cache_ghosts = CodegenVariable('cache_ghosts','int',tg, cache_ghosts = CodegenVariable('cache_ghosts','int',tg,
const=True,value=self.cache_ghosts()) const=True,value=self.cache_ghosts())
...@@ -364,23 +365,34 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ...@@ -364,23 +365,34 @@ class DirectionalStretchingKernel(KernelCodeGenerator):
try: try:
fval = local_id[0] if i==0 else global_id.fval(i) fval = local_id[0] if i==0 else global_id.fval(i)
gsize = local_work() if i==0 else global_size[i] gsize = local_work() if i==0 else global_size[i]
N = compute_grid_size[i]
ghosts = compute_grid_ghosts[i]
if i==0: if i==0:
N = '{}+2*{}'.format(N,cache_ghosts()) N = '(({}+2*{}+{lwork}-1)/{lwork})*{Lx}'.format(compute_grid_size[i],cache_ghosts(),lwork=local_work(), Lx=local_size[0])
ghosts = '({}-{})'.format(ghosts,cache_ghosts()) ghosts = '({}-{})'.format(compute_grid_ghosts[i],cache_ghosts())
else:
N = '{Sx}'.format(Sx=compute_grid_size[i])
ghosts = compute_grid_ghosts[i]
with s._for_('int {i}={fval}; {i}<{N}; {i}+={gsize}'.format( with s._for_('int {i}={fval}; {i}<{N}; {i}+={gsize}'.format(
i='kji'[i], fval=fval, gsize=gsize,N=N)) as ctx: i='kji'[i], fval=fval, gsize=gsize,N=N)) as ctx:
s.append('{} = {}+{};'.format(global_id[i], 'kji'[i], ghosts)) s.append('{} = {}+{};'.format(global_id[i], 'kji'[i], ghosts))
if i==1: if i==0:
first.declare(s) active.declare(s, init='(k < {}+2*{})'.format(
yield ctx compute_grid_size[0],cache_ghosts()))
elif i==1:
first.declare(s)
yield ctx
if i>0:
s.barrier(_local=True)
except: except:
raise raise
nested_loops = [_work_iterate_(i) for i in xrange(dim-1,-1,-1)] nested_loops = [_work_iterate_(i) for i in xrange(dim-1,-1,-1)]
@contextmanager
def if_thread_active():
with s._if_('{}'.format(active())):
yield
with s._kernel_(): with s._kernel_():
s.jumpline() s.jumpline()
with s._align_() as al: with s._align_() as al:
...@@ -427,10 +439,10 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ...@@ -427,10 +439,10 @@ class DirectionalStretchingKernel(KernelCodeGenerator):
with contextlib.nested(*nested_loops): with contextlib.nested(*nested_loops):
s.jumpline() s.jumpline()
init = compute_index(idx=global_id, size=grid_size) init = compute_index(idx=global_id, size=grid_size)
s.append('{} = {};'.format(global_index(), init)) s.append('{} = {};'.format(global_index(), init))
winit, uinit = '','' winit, uinit = '',''
for i in xrange(work_dim): for i in xrange(work_dim):
Wi = self.svorticity+self.xyz[i] Wi = self.svorticity+self.xyz[i]
...@@ -442,53 +454,67 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ...@@ -442,53 +454,67 @@ class DirectionalStretchingKernel(KernelCodeGenerator):
s.jumpline() s.jumpline()
s.append('{} {},{};'.format(U.ctype,U(),W())) s.append('{} {},{};'.format(U.ctype,U(),W()))
with s._if_('{}'.format(first())): with if_thread_active():
s.append('{} = {};'.format(U(), uinit)) with s._if_('{}'.format(first())):
s.append('{} = {};'.format(W(), winit))
if is_periodic:
with s._if_('{lid} < 2*{ghosts}'.format(lid=local_id[0],ghosts=cache_ghosts())):
s.append('{} = {};'.format(Ul[local_id[0]], U()))
s.append('{} = {};'.format(Wl[local_id[0]], W()))
s.append('{} = false;'.format(first()))
with s._else_():
if is_periodic:
with s._if_('{} >= {}-{}'.format(global_id[0],compute_grid_size[0],cache_ghosts())):
_id = '{}-{}+{}'.format(global_id[0],compute_grid_size[0],cache_ghosts())
s.append('{} = {};'.format(U(), Ul[_id]))
s.append('{} = {};'.format(W(), Wl[_id]))
with s._elif_('{} < 2*{}'.format(local_id[0],cache_ghosts())):
s.append('{} = {};'.format(U(), Ur[local_id[0]]))
s.append('{} = {};'.format(W(), Wr[local_id[0]]))
else:
with s._if_('{} < 2*{}'.format(local_id[0],cache_ghosts())):
s.append('{} = {};'.format(U(), Ur[local_id[0]]))
s.append('{} = {};'.format(W(), Wr[local_id[0]]))
with s._else_():
s.append('{} = {};'.format(U(), uinit)) s.append('{} = {};'.format(U(), uinit))
s.append('{} = {};'.format(W(), winit)) s.append('{} = {};'.format(W(), winit))
if is_periodic:
with s._if_('{lid} < 2*{ghosts}'.format(lid=local_id[0],ghosts=cache_ghosts())):
s.append('{} = {};'.format(Ul[local_id[0]], U()))
s.append('{} = {};'.format(Wl[local_id[0]], W()))
s.append('{} = false;'.format(first()))
with s._else_():
if is_periodic:
with s._if_('{} >= {}-{}'.format(global_id[0],compute_grid_size[0],cache_ghosts())):
_id = '{}-{}+{}'.format(global_id[0],compute_grid_size[0],cache_ghosts())
s.append('{} = {};'.format(U(), Ul[_id]))
s.append('{} = {};'.format(W(), Wl[_id]))
with s._elif_('{} < 2*{}'.format(local_id[0],cache_ghosts())):
s.append('{} = {};'.format(U(), Ur[local_id[0]]))
s.append('{} = {};'.format(W(), Wr[local_id[0]]))
else:
with s._if_('{} < 2*{}'.format(local_id[0],cache_ghosts())):
s.append('{} = {};'.format(U(), Ur[local_id[0]]))
s.append('{} = {};'.format(W(), Wr[local_id[0]]))
with s._else_():
s.append('{} = {};'.format(U(), uinit))
s.append('{} = {};'.format(W(), winit))
s.barrier(_local=True) s.barrier(_local=True)
s.jumpline() s.jumpline()
if self.is_cached:
for i in xrange(work_dim):
Ui = self.svelocity+self.xyz[i]
Uic = cached_vars[Ui]
code = '{} = {};'.format(Uic[local_id[0]],U[i])
s.append(code)
s.jumpline()
with if_thread_active():
#cond='get_global_id(1)==0 && get_global_id(2)==0'
#with s._if_(cond):
#code = 'printf(\"READ: gid=(%i,%i,%i), lid.x=%i, GID=%i, U.x=%f, W.x=%f \\n\", gid.x,gid.y,gid.z,lid.x,GID,U.x,W.x);'
#s.append(code)
if self.is_cached:
for i in xrange(work_dim):
Ui = self.svelocity+self.xyz[i]
Uic = cached_vars[Ui]
code = '{} = {};'.format(Uic[local_id[0]],U[i])
s.append(code)
s.jumpline()
with s._if_('{} >= {}-2*{}'.format(local_id[0],local_size[0],cache_ghosts())):
_id = '{}-{}+2*{}'.format(local_id[0],local_size[0],cache_ghosts())
s.append('{} = {};'.format(Ur[_id], U()))
s.append('{} = {};'.format(Wr[_id], W()))
with s._if_('{} >= {}-2*{}'.format(local_id[0],local_size[0],cache_ghosts())):
_id = '{}-{}+2*{}'.format(local_id[0],local_size[0],cache_ghosts())
s.append('{} = {};'.format(Ur[_id], U()))
s.append('{} = {};'.format(Wr[_id], W()))
s.barrier(_local=True) s.barrier(_local=True)
s.jumpline()
rk_args={'dt': dt, rk_args={'dt': dt,
'inv_dx': inv_dx, 'inv_dx': inv_dx,
'W': W} 'W': W,
'active': active,
'Lx' : local_size[0],
'lidx' : local_id[0]}
if is_periodic and (not is_cached): if is_periodic and (not is_cached):
base = CodegenVariable('base','int',typegen=tg,const=True) base = CodegenVariable('base','int',typegen=tg,const=True)
base.declare(s,init='({}/{}) * {}'.format(global_index(),grid_size[0],grid_size[0])) base.declare(s,init='({}/{}) * {}'.format(global_index(),grid_size[0],grid_size[0]))
...@@ -499,8 +525,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ...@@ -499,8 +525,7 @@ class DirectionalStretchingKernel(KernelCodeGenerator):
rk_args['width'] = grid_size[0] rk_args['width'] = grid_size[0]
else: else:
rk_args['offset'] = local_id[0] if is_cached else global_index rk_args['offset'] = local_id[0] if is_cached else global_index
rk_args['Lx'] = local_size[0]
rk_args['lidx'] = local_id[0]
for i in xrange(work_dim): for i in xrange(work_dim):
Ui_name = self.svelocity+xyz[i] Ui_name = self.svelocity+xyz[i]
if is_cached: if is_cached:
...@@ -519,21 +544,28 @@ class DirectionalStretchingKernel(KernelCodeGenerator): ...@@ -519,21 +544,28 @@ class DirectionalStretchingKernel(KernelCodeGenerator):
code = '{} = {};'.format(W(), call) code = '{} = {};'.format(W(), call)
s.append(code) s.append(code)
s.jumpline() s.jumpline()
with if_thread_active():
if is_periodic: if is_periodic:
cond = '({lid}>={ghosts}) && ({lid}<{L}-{ghosts}) && ({gidx}<{size})'.format( cond = '({lid}>={ghosts}) && ({lid}<{L}-{ghosts}) && ({gidx}<{size})'.format(
lid=local_id[0], ghosts=cache_ghosts(), L=local_size[0], lid=local_id[0], ghosts=cache_ghosts(), L=local_size[0],
gidx=global_id[0], size=compute_grid_size[0]) gidx=global_id[0], size=compute_grid_size[0])
else: else:
cond = '({lid}>={ghosts}) && ({lid}<{L}-{ghosts})'.format( cond = '({lid}>={ghosts}) && ({lid}<{L}-{ghosts})'.format(
lid=local_id[0], ghosts=cache_ghosts(), L=local_size[0]) lid=local_id[0], ghosts=cache_ghosts(), L=local_size[0])
with s._if_(cond): with s._if_(cond):
for i in xrange(work_dim): #cond='get_global_id(1)==0 && get_global_id(2)==0'
Wi = self.svorticity+self.xyz[i] #with s._if_(cond):
Wi = s.vars[Wi] #code = 'printf(\"WRITE: gid=(%i,%i,%i), lid.x=%i, GID=%i, U.x=%f, W.x=%f, newWx=%f \\n\", gid.x,gid.y,gid.z,lid.x,GID,U.x,Wx[GID],W.x);'
code='{} = {};'.format(Wi[global_index()], W[i]) #s.append(code)
s.append(code) for i in xrange(work_dim):
Wi = self.svorticity+self.xyz[i]
Wi = s.vars[Wi]
code='{} = {};'.format(Wi[global_index()], W[i])
s.append(code)
#with s._elif_('get_global_id(1)==0 && get_global_id(2)==0'):
#code = 'printf(\"SKIP: gid=(%i,%i,%i), lid.x=%i, GID=%i, U.x=%f, W.x=%f, newWx=%f \\n\", gid.x,gid.y,gid.z,lid.x,GID,U.x,Wx[GID],W.x);'
#s.append(code)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -17,7 +17,7 @@ class TestDirectionalStretching(object): ...@@ -17,7 +17,7 @@ class TestDirectionalStretching(object):
queue = cl.CommandQueue(typegen.context) queue = cl.CommandQueue(typegen.context)
ctx = typegen.context ctx = typegen.context
grid_size = np.asarray([100,20,20]) grid_size = np.asarray([256,256,256])
compute_grid_ghosts = np.asarray([3*4,0,0]) compute_grid_ghosts = np.asarray([3*4,0,0])
compute_grid_size = grid_size + 2*compute_grid_ghosts compute_grid_size = grid_size + 2*compute_grid_ghosts
...@@ -108,8 +108,8 @@ class TestDirectionalStretching(object): ...@@ -108,8 +108,8 @@ class TestDirectionalStretching(object):
cls.host_buffers_gpu = host_buffers_gpu cls.host_buffers_gpu = host_buffers_gpu
cls.device_buffers = device_buffers cls.device_buffers = device_buffers
cls.local_work_size = np.asarray([42,1,1]) cls.local_work_size = np.asarray([30,1,1])
cls.work_load = np.asarray([1,1,1]) cls.work_load = np.asarray([1,4,4])
cls.inv_dx = inv_dx cls.inv_dx = inv_dx
cls.dt = dtype(0.5) cls.dt = dtype(0.5)
...@@ -314,24 +314,23 @@ class TestDirectionalStretching(object): ...@@ -314,24 +314,23 @@ class TestDirectionalStretching(object):
if boundary == BoundaryCondition.PERIODIC: if boundary == BoundaryCondition.PERIODIC:
target = 'no_ghosts' target = 'no_ghosts'
mesh_info = self.grid_mesh_info mesh_info = self.grid_mesh_info
view = [slice(0,grid_shape[0]), view = [slice(0,grid_size[2]),
slice(0,grid_shape[1]), slice(0,grid_size[1]),
slice(0,grid_shape[2])] slice(0,grid_size[0])]
elif boundary == BoundaryCondition.NONE: elif boundary == BoundaryCondition.NONE:
target = 'with_ghosts' target = 'with_ghosts'
mesh_info = self.compute_grid_mesh_info mesh_info = self.compute_grid_mesh_info
view = [slice(ghosts[2],grid_shape[0]+ghosts[2]), view = [slice(ghosts[2],grid_size[2]+ghosts[2]),
slice(ghosts[1],grid_shape[1]+ghosts[1]), slice(ghosts[1],grid_size[1]+ghosts[1]),
slice(ghosts[0],grid_shape[2]+ghosts[0])] slice(ghosts[0],grid_size[0]+ghosts[0])]
else: else:
raise ValueError() raise ValueError()
known_vars = { known_vars = {
'local_size': local_work_size, 'local_size': local_work_size,
'mesh_info': mesh_info 'mesh_info': mesh_info
} }
host_init_buffers = self.host_buffers_init[target] host_init_buffers = self.host_buffers_init[target]
host_buffers_reference = self.host_buffers_reference[target] host_buffers_reference = self.host_buffers_reference[target]
host_buffers_gpu = self.host_buffers_gpu[target] host_buffers_gpu = self.host_buffers_gpu[target]
...@@ -401,7 +400,7 @@ class TestDirectionalStretching(object): ...@@ -401,7 +400,7 @@ class TestDirectionalStretching(object):
for (name,host,dev) in buffers: for (name,host,dev) in buffers:
(l1,l2,linf) = self._distances(host,dev,view) (l1,l2,linf) = self._distances(host,dev,view)
print '\t{} -> l1={} l2={} linf={}'.format(name,l1,l2,linf) print '\t{} -> l1={} l2={} linf={}'.format(name,l1,l2,linf)
if linf>1e-8: if l2>1e-11:
err_buffers.append(name) err_buffers.append(name)
good = False good = False
if not good: if not good:
...@@ -447,7 +446,7 @@ class TestDirectionalStretching(object): ...@@ -447,7 +446,7 @@ class TestDirectionalStretching(object):
assert isinstance(rk_scheme, ExplicitRungeKutta) assert isinstance(rk_scheme, ExplicitRungeKutta)
cached=[False,True] cached=[False,True]
boundaries=[BoundaryCondition.PERIODIC,BoundaryCondition.NONE] boundaries=[BoundaryCondition.NONE, BoundaryCondition.PERIODIC]
directions=[0,1,2] directions=[0,1,2]
orders=[2,4,6] orders=[2,4,6]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment