Skip to content
Snippets Groups Projects
Commit 64d8c30e authored by Jean-Matthieu Etancelin's avatar Jean-Matthieu Etancelin
Browse files

improve Kernel benchmark tool.

parent c4b19f5f
No related branches found
No related tags found
No related merge requests found
......@@ -7,12 +7,186 @@ import pyopencl as cl
import numpy as np
import pylab as pl
import signal
import pickle
class BenchmarkSuite:
"""Benchark suite management"""
def __init__(self, sizes, kernel_name,
versions, configs, test=False, true_res=None, arg_to_test=0, inputs={}, file_name="Benchmarks_data"):
"""
Creates a benchmak suite, that consists in a list of Benchmark.
@param sizes : list of different problem sizes to benchmark.
@param kernel_name : name of the kernel to benchmark.
@param versions : list of tuples containing kernel versions (kernel sources, kernel OpenCL name ).
@param configs : dictionary of configurations.
keys are kernel OpenCL name,
values are tuples containing (compilation flags function, kernel arguments settings, config name, condition related to problem size)
@param test : by default no results tests are performed
@param true_res : function to compute true results
@param arg_to_test : index of kernel arguments that contains result
@param input : input data
@param file_name : name of file to store results in
On creation, data are loaded from a serialized version of timings in the file represented by file_name parameter.
If no such file, a new database is created.
"""
self.pickle_file_name = file_name
self.sizes = sizes
self.versions = versions
self.configs = configs
self.inputs = inputs
self.test = test
self.compute_true_res = true_res
self.arg_to_test = arg_to_test
self.kernel_name = kernel_name
if not self.test:
try:
print 'Loading form pickled file ...',
self.timings = pickle.load(open(self.pickle_file_name + '.pickle', 'r'))
print 'Done.'
except IOError:
print 'No such file : ', self.pickle_file_name + '.pickle'
print 'start new database'
self.timings = {}
else:
self.timings = {}
self.complete_timings()
def complete_timings(self):
"""
Manage dictionary structure of timings.
Add all new keys in dictionaries.
"""
if self.kernel_name not in self.timings.keys():
self.timings[self.kernel_name] = {}
for v in self.versions:
if not v[1] in self.timings[self.kernel_name].keys():
self.timings[self.kernel_name][v[1]] = {}
for c in self.configs[v[1]]:
if not c[2] in self.timings[self.kernel_name][v[1]].keys():
self.timings[self.kernel_name][v[1]][c[2]] = {}
def launch(self):
"""
Performs the benchmark for all kernel versions and all configs.
If test flag is set to True, results is compared to the true result and timings are not saved.
Else, timings are added to timings dictionary and then serialized in a file. A text version is also writed.
"""
if self.test:
self.true_res = {}
self.compute_true_res(self.sizes, self.true_res, self.inputs)
for v in self.versions:
for conf in self.configs[v[1]]:
try:
allowed_size = conf[3]
except IndexError:
allowed_size = None
if callable(conf[0]):
b = Benchmark(v[0], v[1], self.sizes,
lambda s: "-D WIDTH=" + str(s[0]) + conf[0](s),
inputs=self.inputs, allowed_size=allowed_size)
else:
b = Benchmark(v[0], v[1], self.sizes,
lambda s: "-D WIDTH=" + str(s[0]) + conf[0],
inputs=self.inputs, allowed_size=allowed_size)
b.kernelSetup = conf[1]
if self.test:
b.test(self.true_res, self.arg_to_test)
else:
b.launch()
[self.timings[self.kernel_name][v[1]][conf[2]].__setitem__(
t[0], t[1]) for t in b.timings.items()]
if not self.test:
pickle.dump(self.timings, open(self.pickle_file_name + '.pickle', 'w'), 0)
self.write_file()
def write_file(self):
"""
Write a text version of database.
Two outputs are created :
@li full : kernels versions and configs are given in columns and sizes in rows.
@li hist : all data is given in rows to enable gnuplot to plot histograms.
"""
f = open(self.pickle_file_name + '_full.dat', 'w')
#build size set
sizes_set = set()
config_set = set()
cols_lists = {}
for k in self.timings.keys():
for v in self.timings[k].keys():
cols_lists[v] = []
for c in self.timings[k][v].keys():
for s in self.timings[k][v][c]:
sizes_set.add(s)
config_set.add(c)
f.write("size dim ")
i = 0
for k in sorted(self.timings.keys()):
for v in sorted(self.timings[k].keys()):
for c in sorted(self.timings[k][v].keys()):
f.write(v + '_' + c + ' ')
cols_lists[v].append(i)
i += 1
f.write("\n")
for s in sorted(sizes_set):
f.write(str(s[0]) + " " + str(len(s)) + " ")
for k in sorted(self.timings.keys()):
for v in sorted(self.timings[k].keys()):
for c in sorted(self.timings[k][v].keys()):
try:
f.write(str(self.timings[k][v][c][s]) + " ")
except KeyError as ke:
if ke.message is s:
f.write("- ")
else:
raise ke
f.write("\n")
for k in sorted(self.timings.keys()):
for v in sorted(self.timings[k].keys()):
f.write('#' + v + '=')
for i in cols_lists[v]:
f.write(str(i) + ' ')
f.write('\n')
f.close()
f = open(self.pickle_file_name + '_hist.dat', 'w')
f.write("#kernel_nb=" + str(len(self.timings.keys())) + "\n")
f.write("#kernel_names=")
for k in sorted(self.timings.keys()):
f.write(k + " ")
f.write("\n")
f.write("#version_nb=")
for k in sorted(self.timings.keys()):
f.write(str(len(self.timings[k].keys())) + " ")
f.write("\n")
f.write("#config_nb=" + str(len(config_set)) + "\n")
for i, s in enumerate(sorted(sizes_set)):
f.write("#Block_{0}_{1}={2}\n".format(s[0], len(s), i))
for s in sorted(sizes_set):
for c in sorted(config_set):
for k in sorted(self.timings.keys()):
for v in sorted(self.timings[k].keys()):
f.write(str(s[0]) + " " + str(len(s)) + " ")
f.write(k + " ")
f.write(v + " ")
f.write(c + " ")
try:
f.write(str(self.timings[k][v][c][s]) + "\n")
except:
f.write('-\n')
f.write("\n")
f.close()
class Benchmark:
"""Benchmark management"""
def __init__(self, code, kernel_name, sizes, build_opt, nb_run=20):
def __init__(self, code, kernel_name, sizes, build_opt, nb_run=20, inputs=None, allowed_size=None):
"""
Creates a benchmark for a given source code, kernel for differnet problem sizes.
......@@ -21,6 +195,8 @@ class Benchmark:
@param sizes : list of different problem sizes to launch kernel
@param build_opt : OpenCL compiler options
@param nb_run : number of launches to average time (default = 20)
@param inputs : input data
@param allowed_size : boolean function that allows benchmarks regarding problem size (depends on configuration)
"""
#Get platform.
try:
......@@ -65,10 +241,16 @@ class Benchmark:
self.nb_run = nb_run
## Problems sizes
self.sizes = sizes
for size in self.sizes:
prg = cl.Program(self.ctx, self.code)
prg.build(" -cl-single-precision-constant -cl-opt-disable " + build_opt(size))
self.prg[size] = prg
## Problems inputs
self.inputs = inputs
## Function to test size
self.is_size_allowed = allowed_size
if self.code is not None:
for size in self.sizes:
if self.is_size_allowed is None or self.is_size_allowed(size):
prg = cl.Program(self.ctx, self.code)
prg.build(" -cl-single-precision-constant -cl-opt-disable " + build_opt(size))
self.prg[size] = prg
## Function to setup kernels arguments
self.kernelSetup = None
......@@ -81,30 +263,38 @@ class Benchmark:
"""
print "Testing : "
for size in self.sizes:
kernel = eval('self.prg[size].' + self.kernel)
kernelArgs = self.kernelSetup(size)
res = np.ones(size, dtype=np.float32, order='F')
for i in xrange(len(kernelArgs)):
if isinstance(kernelArgs[i], np.ndarray):
buff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=kernelArgs[i].nbytes)
cl.enqueue_copy(self.queue, buff, kernelArgs[i])
kernelArgs[i] = buff
self.queue.finish()
print kernelArgs[0:2]
kernel(self.queue, *tuple(kernelArgs))
self.queue.finish()
cl.enqueue_copy(self.queue, res, kernelArgs[ind_res])
print size
try:
np.testing.assert_array_almost_equal(res, true_res[size], decimal=6)
print 'Ok'
except AssertionError as ae:
print res[np.where(np.abs(res - true_res[size]) >= 1e-6)].shape, "bad elements ~ 1e-6"
print res[np.where(np.abs(res - true_res[size]) >= 1e-5)].shape, "bad elements ~ 1e-5"
print res[np.where(np.abs(res - true_res[size]) >= 1e-4)].shape, "bad elements ~ 1e-4"
print res[np.where(np.abs(res - true_res[size]) >= 1e-3)].shape, "bad elements ~ 1e-3"
if res[np.where(np.abs(res - true_res[size]) >= 1e-3)].shape[0] > 0:
raise ae
if self.is_size_allowed is None or self.is_size_allowed(size):
kernel = eval('self.prg[size].' + self.kernel)
kernelArgs = self.kernelSetup(size, self.inputs)
res = np.empty_like(kernelArgs[ind_res])
for i in xrange(len(kernelArgs)):
if isinstance(kernelArgs[i], np.ndarray) and len(kernelArgs[i].shape) > 1:
buff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=kernelArgs[i].nbytes)
cl.enqueue_copy(self.queue, buff, kernelArgs[i])
kernelArgs[i] = buff
self.queue.finish()
print kernelArgs[0:2]
kernel(self.queue, *tuple(kernelArgs))
self.queue.finish()
cl.enqueue_copy(self.queue, res, kernelArgs[ind_res])
self.queue.finish()
print size
try:
if len(res.shape) == 3:
res = res[:size[0], :size[1], :size[2]]
else:
res = res[:size[0], :size[1]]
np.testing.assert_array_almost_equal(res, true_res[size], decimal=6)
print 'Ok'
except AssertionError as ae:
print res[np.where(np.abs(res - true_res[size]) >= 1e-6)].shape, "bad elements ~ 1e-6"
print res[np.where(np.abs(res - true_res[size]) >= 1e-5)].shape, "bad elements ~ 1e-5"
print res[np.where(np.abs(res - true_res[size]) >= 1e-4)].shape, "bad elements ~ 1e-4"
print res[np.where(np.abs(res - true_res[size]) >= 1e-3)].shape, "bad elements ~ 1e-3"
if res[np.where(np.abs(res - true_res[size]) >= 1e-3)].shape[0] > 0:
print res[np.where(np.abs(res - true_res[size]) >= 1e-3)]
print true_res[size][np.where(np.abs(res - true_res[size]) >= 1e-3)]
raise ae
def launch(self):
"""
......@@ -114,58 +304,29 @@ class Benchmark:
"""
print "\nRunning : "
for size in self.sizes:
print size,
kernel = eval('self.prg[size].' + self.kernel)
if not self.kernelSetup is None:
kernelArgs = self.kernelSetup(size)
for i in xrange(len(kernelArgs)):
if isinstance(kernelArgs[i], np.ndarray):
buff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=kernelArgs[i].nbytes)
cl.enqueue_copy(self.queue, buff, kernelArgs[i])
kernelArgs[i] = buff
self.queue.finish()
evt = kernel(self.queue, *tuple(kernelArgs))
self.queue.finish()
evts = []
for i in xrange(self.nb_run):
if self.is_size_allowed is None or self.is_size_allowed(size):
print size,
kernel = eval('self.prg[size].' + self.kernel)
if not self.kernelSetup is None:
kernelArgs = self.kernelSetup(size, self.inputs)
for i in xrange(len(kernelArgs)):
if isinstance(kernelArgs[i], np.ndarray) and len(kernelArgs[i].shape) > 1:
buff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=kernelArgs[i].nbytes)
cl.enqueue_copy(self.queue, buff, kernelArgs[i])
kernelArgs[i] = buff
self.queue.finish()
print kernelArgs[0:2]
evt = kernel(self.queue, *tuple(kernelArgs))
evts.append(evt)
self.queue.finish()
time = 0.
for evt in evts:
time += (evt.profile.end - evt.profile.start) * 1e-9
self.timings[size] = time / self.nb_run
self.kernel_args[size] = kernelArgs[0:2]
print self.timings[size], "args : ", kernelArgs[0:2]
def show_results(self):
"""Display timings in a plot."""
pl.figure(1)
pl.plot([size[0] for size in self.sizes],
[self.timings[size] / (size[0] ** len(size)) for size in self.sizes],
linewidth=1.0)
pl.xlabel('problem sizes')
pl.ylabel('time per particle')
pl.grid(True)
signal.signal(signal.SIGINT, signal.SIG_DFL)
pl.show()
def toFile(self, filename, header=None):
"""
Save timings into a file.
evts = []
for i in xrange(self.nb_run):
evt = kernel(self.queue, *tuple(kernelArgs))
evts.append(evt)
self.queue.finish()
time = 0.
for evt in evts:
time += (evt.profile.end - evt.profile.start) * 1e-9
self.timings[size] = time / self.nb_run
self.kernel_args[size] = kernelArgs[0:2]
print self.timings[size], "args : ", kernelArgs[0:2]
@param filename : file name
@param header : Optional first line of file
"""
f = open(filename, 'w')
if header is not None:
f.write(header + "\n")
f.write("#size dim nPart time kerel_wg \n")
for size in self.sizes:
f.write("{0}\t {1}\t {2}\t {3}\t".format(size[0],
len(size),
size[0] ** len(size),
self.timings[size]))
f.write(str(self.kernel_args[size]))
f.write("\n")
f.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment