diff --git a/examples/advection_gpu.py b/examples/advection_gpu.py index 119cefb8b33098c02bd79b5a8c11d246f6c6c1a9..67c89ece3c89e341fcf86e936d9e073d9986a9ec 100644 --- a/examples/advection_gpu.py +++ b/examples/advection_gpu.py @@ -27,7 +27,7 @@ if __name__=='__main__': GHOSTS = 0 NSCALARS = 0 - f_resolution = (129,129,129)[:DIM] + f_resolution = (257,257,257)[:DIM] v_resolution = f_resolution #v_resolution = (33,33,33)[:DIM] ghosts = (GHOSTS,)*DIM @@ -128,7 +128,7 @@ if __name__=='__main__': else: print 'OK' - autotuner_config = AutotunerConfig(autotuner_flag=AutotunerFlags.MEASURE, prune_threshold=1.20, override_cache=False) + autotuner_config = AutotunerConfig(autotuner_flag=AutotunerFlags.PATIENT, prune_threshold=1.20, override_cache=False) method = { Backend: Backend.OPENCL, @@ -139,7 +139,11 @@ if __name__=='__main__': 'use_builtin_copy':True, 'stretching': { 'formulation': StretchingFormulation.GRAD_UW, +<<<<<<< HEAD 'order':64 +======= + 'order':16 +>>>>>>> f03a24d1fe54c00f58e70368a14ef9043deb4581 } } } diff --git a/hysop/codegen/base/statistics.py b/hysop/codegen/base/statistics.py index ed767cc0b1bb132b6c870db35844fe379cc92abc..1435926fb8711f8d1d477be4df31aff42ea58222 100644 --- a/hysop/codegen/base/statistics.py +++ b/hysop/codegen/base/statistics.py @@ -143,7 +143,7 @@ class TimedWorkStatistics(WorkStatistics): return self.global_mem_transactions()/self.duration def local_mem_throughput(self): return self.local_mem_transactions()/self.duration - def achieved_mem_throughput(self): + def total_mem_throughput(self): return self.total_mem_transactions()/self.duration def _init(self): diff --git a/hysop/tools/callback.py b/hysop/tools/callback.py index 740fc635f58124b5c06d41fbde2616fa3570ca33..b8c7478dcc4218eb268fa415bcb9f48fabe9ae66 100644 --- a/hysop/tools/callback.py +++ b/hysop/tools/callback.py @@ -152,6 +152,8 @@ class ComputeInterface(MemInterface): if (timed_stats is not None): if timed_stats.global_mem_throughput()>0: s += ' throughput={}'.format(bdw2str(timed_stats.global_mem_throughput())) + if timed_stats.global_mem_throughput() < timed_stats.total_mem_throughput(): + s+= ' (tot={})'.format(bdw2str(timed_stats.total_mem_throughput())) for (op_category, ops_per_second) in timed_stats.ops_per_second().iteritems(): s += ' {}'.format(unit2str(ops_per_second,op_category,decimal=True,rounded=0))