From 64d8c30ee3af42f7e6f8dc1dc89a72e34ab8be29 Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@imag.fr> Date: Fri, 25 Jan 2013 15:46:38 +0000 Subject: [PATCH] improve Kernel benchmark tool. --- HySoP/hysop/tools/kernel_benchmark.py | 323 +++++++++++++++++++------- 1 file changed, 242 insertions(+), 81 deletions(-) diff --git a/HySoP/hysop/tools/kernel_benchmark.py b/HySoP/hysop/tools/kernel_benchmark.py index caafb677a..c989ce4a2 100644 --- a/HySoP/hysop/tools/kernel_benchmark.py +++ b/HySoP/hysop/tools/kernel_benchmark.py @@ -7,12 +7,186 @@ import pyopencl as cl import numpy as np import pylab as pl import signal +import pickle + + +class BenchmarkSuite: + """Benchark suite management""" + + def __init__(self, sizes, kernel_name, + versions, configs, test=False, true_res=None, arg_to_test=0, inputs={}, file_name="Benchmarks_data"): + """ + Creates a benchmak suite, that consists in a list of Benchmark. + + @param sizes : list of different problem sizes to benchmark. + @param kernel_name : name of the kernel to benchmark. + @param versions : list of tuples containing kernel versions (kernel sources, kernel OpenCL name ). + @param configs : dictionary of configurations. + keys are kernel OpenCL name, + values are tuples containing (compilation flags function, kernel arguments settings, config name, condition related to problem size) + @param test : by default no results tests are performed + @param true_res : function to compute true results + @param arg_to_test : index of kernel arguments that contains result + @param input : input data + @param file_name : name of file to store results in + + On creation, data are loaded from a serialized version of timings in the file represented by file_name parameter. + If no such file, a new database is created. + """ + self.pickle_file_name = file_name + self.sizes = sizes + self.versions = versions + self.configs = configs + self.inputs = inputs + self.test = test + self.compute_true_res = true_res + self.arg_to_test = arg_to_test + self.kernel_name = kernel_name + if not self.test: + try: + print 'Loading form pickled file ...', + self.timings = pickle.load(open(self.pickle_file_name + '.pickle', 'r')) + print 'Done.' + except IOError: + print 'No such file : ', self.pickle_file_name + '.pickle' + print 'start new database' + self.timings = {} + else: + self.timings = {} + self.complete_timings() + + def complete_timings(self): + """ + Manage dictionary structure of timings. + + Add all new keys in dictionaries. + """ + if self.kernel_name not in self.timings.keys(): + self.timings[self.kernel_name] = {} + for v in self.versions: + if not v[1] in self.timings[self.kernel_name].keys(): + self.timings[self.kernel_name][v[1]] = {} + for c in self.configs[v[1]]: + if not c[2] in self.timings[self.kernel_name][v[1]].keys(): + self.timings[self.kernel_name][v[1]][c[2]] = {} + + def launch(self): + """ + Performs the benchmark for all kernel versions and all configs. + + If test flag is set to True, results is compared to the true result and timings are not saved. + Else, timings are added to timings dictionary and then serialized in a file. A text version is also writed. + """ + if self.test: + self.true_res = {} + self.compute_true_res(self.sizes, self.true_res, self.inputs) + for v in self.versions: + for conf in self.configs[v[1]]: + try: + allowed_size = conf[3] + except IndexError: + allowed_size = None + if callable(conf[0]): + b = Benchmark(v[0], v[1], self.sizes, + lambda s: "-D WIDTH=" + str(s[0]) + conf[0](s), + inputs=self.inputs, allowed_size=allowed_size) + else: + b = Benchmark(v[0], v[1], self.sizes, + lambda s: "-D WIDTH=" + str(s[0]) + conf[0], + inputs=self.inputs, allowed_size=allowed_size) + b.kernelSetup = conf[1] + if self.test: + b.test(self.true_res, self.arg_to_test) + else: + b.launch() + [self.timings[self.kernel_name][v[1]][conf[2]].__setitem__( + t[0], t[1]) for t in b.timings.items()] + if not self.test: + pickle.dump(self.timings, open(self.pickle_file_name + '.pickle', 'w'), 0) + self.write_file() + + def write_file(self): + """ + Write a text version of database. + + Two outputs are created : + @li full : kernels versions and configs are given in columns and sizes in rows. + @li hist : all data is given in rows to enable gnuplot to plot histograms. + """ + f = open(self.pickle_file_name + '_full.dat', 'w') + #build size set + sizes_set = set() + config_set = set() + cols_lists = {} + for k in self.timings.keys(): + for v in self.timings[k].keys(): + cols_lists[v] = [] + for c in self.timings[k][v].keys(): + for s in self.timings[k][v][c]: + sizes_set.add(s) + config_set.add(c) + f.write("size dim ") + i = 0 + for k in sorted(self.timings.keys()): + for v in sorted(self.timings[k].keys()): + for c in sorted(self.timings[k][v].keys()): + f.write(v + '_' + c + ' ') + cols_lists[v].append(i) + i += 1 + f.write("\n") + for s in sorted(sizes_set): + f.write(str(s[0]) + " " + str(len(s)) + " ") + for k in sorted(self.timings.keys()): + for v in sorted(self.timings[k].keys()): + for c in sorted(self.timings[k][v].keys()): + try: + f.write(str(self.timings[k][v][c][s]) + " ") + except KeyError as ke: + if ke.message is s: + f.write("- ") + else: + raise ke + f.write("\n") + for k in sorted(self.timings.keys()): + for v in sorted(self.timings[k].keys()): + f.write('#' + v + '=') + for i in cols_lists[v]: + f.write(str(i) + ' ') + f.write('\n') + f.close() + f = open(self.pickle_file_name + '_hist.dat', 'w') + f.write("#kernel_nb=" + str(len(self.timings.keys())) + "\n") + f.write("#kernel_names=") + for k in sorted(self.timings.keys()): + f.write(k + " ") + f.write("\n") + f.write("#version_nb=") + for k in sorted(self.timings.keys()): + f.write(str(len(self.timings[k].keys())) + " ") + f.write("\n") + f.write("#config_nb=" + str(len(config_set)) + "\n") + for i, s in enumerate(sorted(sizes_set)): + f.write("#Block_{0}_{1}={2}\n".format(s[0], len(s), i)) + for s in sorted(sizes_set): + for c in sorted(config_set): + for k in sorted(self.timings.keys()): + for v in sorted(self.timings[k].keys()): + f.write(str(s[0]) + " " + str(len(s)) + " ") + f.write(k + " ") + f.write(v + " ") + f.write(c + " ") + try: + f.write(str(self.timings[k][v][c][s]) + "\n") + except: + f.write('-\n') + f.write("\n") + f.close() class Benchmark: """Benchmark management""" - def __init__(self, code, kernel_name, sizes, build_opt, nb_run=20): + def __init__(self, code, kernel_name, sizes, build_opt, nb_run=20, inputs=None, allowed_size=None): """ Creates a benchmark for a given source code, kernel for differnet problem sizes. @@ -21,6 +195,8 @@ class Benchmark: @param sizes : list of different problem sizes to launch kernel @param build_opt : OpenCL compiler options @param nb_run : number of launches to average time (default = 20) + @param inputs : input data + @param allowed_size : boolean function that allows benchmarks regarding problem size (depends on configuration) """ #Get platform. try: @@ -65,10 +241,16 @@ class Benchmark: self.nb_run = nb_run ## Problems sizes self.sizes = sizes - for size in self.sizes: - prg = cl.Program(self.ctx, self.code) - prg.build(" -cl-single-precision-constant -cl-opt-disable " + build_opt(size)) - self.prg[size] = prg + ## Problems inputs + self.inputs = inputs + ## Function to test size + self.is_size_allowed = allowed_size + if self.code is not None: + for size in self.sizes: + if self.is_size_allowed is None or self.is_size_allowed(size): + prg = cl.Program(self.ctx, self.code) + prg.build(" -cl-single-precision-constant -cl-opt-disable " + build_opt(size)) + self.prg[size] = prg ## Function to setup kernels arguments self.kernelSetup = None @@ -81,30 +263,38 @@ class Benchmark: """ print "Testing : " for size in self.sizes: - kernel = eval('self.prg[size].' + self.kernel) - kernelArgs = self.kernelSetup(size) - res = np.ones(size, dtype=np.float32, order='F') - for i in xrange(len(kernelArgs)): - if isinstance(kernelArgs[i], np.ndarray): - buff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=kernelArgs[i].nbytes) - cl.enqueue_copy(self.queue, buff, kernelArgs[i]) - kernelArgs[i] = buff - self.queue.finish() - print kernelArgs[0:2] - kernel(self.queue, *tuple(kernelArgs)) - self.queue.finish() - cl.enqueue_copy(self.queue, res, kernelArgs[ind_res]) - print size - try: - np.testing.assert_array_almost_equal(res, true_res[size], decimal=6) - print 'Ok' - except AssertionError as ae: - print res[np.where(np.abs(res - true_res[size]) >= 1e-6)].shape, "bad elements ~ 1e-6" - print res[np.where(np.abs(res - true_res[size]) >= 1e-5)].shape, "bad elements ~ 1e-5" - print res[np.where(np.abs(res - true_res[size]) >= 1e-4)].shape, "bad elements ~ 1e-4" - print res[np.where(np.abs(res - true_res[size]) >= 1e-3)].shape, "bad elements ~ 1e-3" - if res[np.where(np.abs(res - true_res[size]) >= 1e-3)].shape[0] > 0: - raise ae + if self.is_size_allowed is None or self.is_size_allowed(size): + kernel = eval('self.prg[size].' + self.kernel) + kernelArgs = self.kernelSetup(size, self.inputs) + res = np.empty_like(kernelArgs[ind_res]) + for i in xrange(len(kernelArgs)): + if isinstance(kernelArgs[i], np.ndarray) and len(kernelArgs[i].shape) > 1: + buff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=kernelArgs[i].nbytes) + cl.enqueue_copy(self.queue, buff, kernelArgs[i]) + kernelArgs[i] = buff + self.queue.finish() + print kernelArgs[0:2] + kernel(self.queue, *tuple(kernelArgs)) + self.queue.finish() + cl.enqueue_copy(self.queue, res, kernelArgs[ind_res]) + self.queue.finish() + print size + try: + if len(res.shape) == 3: + res = res[:size[0], :size[1], :size[2]] + else: + res = res[:size[0], :size[1]] + np.testing.assert_array_almost_equal(res, true_res[size], decimal=6) + print 'Ok' + except AssertionError as ae: + print res[np.where(np.abs(res - true_res[size]) >= 1e-6)].shape, "bad elements ~ 1e-6" + print res[np.where(np.abs(res - true_res[size]) >= 1e-5)].shape, "bad elements ~ 1e-5" + print res[np.where(np.abs(res - true_res[size]) >= 1e-4)].shape, "bad elements ~ 1e-4" + print res[np.where(np.abs(res - true_res[size]) >= 1e-3)].shape, "bad elements ~ 1e-3" + if res[np.where(np.abs(res - true_res[size]) >= 1e-3)].shape[0] > 0: + print res[np.where(np.abs(res - true_res[size]) >= 1e-3)] + print true_res[size][np.where(np.abs(res - true_res[size]) >= 1e-3)] + raise ae def launch(self): """ @@ -114,58 +304,29 @@ class Benchmark: """ print "\nRunning : " for size in self.sizes: - print size, - kernel = eval('self.prg[size].' + self.kernel) - if not self.kernelSetup is None: - kernelArgs = self.kernelSetup(size) - for i in xrange(len(kernelArgs)): - if isinstance(kernelArgs[i], np.ndarray): - buff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=kernelArgs[i].nbytes) - cl.enqueue_copy(self.queue, buff, kernelArgs[i]) - kernelArgs[i] = buff - self.queue.finish() - evt = kernel(self.queue, *tuple(kernelArgs)) - self.queue.finish() - evts = [] - for i in xrange(self.nb_run): + if self.is_size_allowed is None or self.is_size_allowed(size): + print size, + kernel = eval('self.prg[size].' + self.kernel) + if not self.kernelSetup is None: + kernelArgs = self.kernelSetup(size, self.inputs) + for i in xrange(len(kernelArgs)): + if isinstance(kernelArgs[i], np.ndarray) and len(kernelArgs[i].shape) > 1: + buff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, size=kernelArgs[i].nbytes) + cl.enqueue_copy(self.queue, buff, kernelArgs[i]) + kernelArgs[i] = buff + self.queue.finish() + print kernelArgs[0:2] evt = kernel(self.queue, *tuple(kernelArgs)) - evts.append(evt) self.queue.finish() - time = 0. - for evt in evts: - time += (evt.profile.end - evt.profile.start) * 1e-9 - self.timings[size] = time / self.nb_run - self.kernel_args[size] = kernelArgs[0:2] - print self.timings[size], "args : ", kernelArgs[0:2] - - def show_results(self): - """Display timings in a plot.""" - pl.figure(1) - pl.plot([size[0] for size in self.sizes], - [self.timings[size] / (size[0] ** len(size)) for size in self.sizes], - linewidth=1.0) - pl.xlabel('problem sizes') - pl.ylabel('time per particle') - pl.grid(True) - signal.signal(signal.SIGINT, signal.SIG_DFL) - pl.show() - - def toFile(self, filename, header=None): - """ - Save timings into a file. + evts = [] + for i in xrange(self.nb_run): + evt = kernel(self.queue, *tuple(kernelArgs)) + evts.append(evt) + self.queue.finish() + time = 0. + for evt in evts: + time += (evt.profile.end - evt.profile.start) * 1e-9 + self.timings[size] = time / self.nb_run + self.kernel_args[size] = kernelArgs[0:2] + print self.timings[size], "args : ", kernelArgs[0:2] - @param filename : file name - @param header : Optional first line of file - """ - f = open(filename, 'w') - if header is not None: - f.write(header + "\n") - f.write("#size dim nPart time kerel_wg \n") - for size in self.sizes: - f.write("{0}\t {1}\t {2}\t {3}\t".format(size[0], - len(size), - size[0] ** len(size), - self.timings[size])) - f.write(str(self.kernel_args[size])) - f.write("\n") - f.close() -- GitLab