From 799eba8d55a2cf10e11230af0be92d334a53e623 Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@imag.fr> Date: Wed, 16 May 2012 13:42:31 +0000 Subject: [PATCH] Add benchmark copy with strides. --- .../kernel_benchmark/compute_stride_copy.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 parmepy/kernel_benchmark/compute_stride_copy.py diff --git a/parmepy/kernel_benchmark/compute_stride_copy.py b/parmepy/kernel_benchmark/compute_stride_copy.py new file mode 100644 index 000000000..38fd1ec35 --- /dev/null +++ b/parmepy/kernel_benchmark/compute_stride_copy.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +import pyopencl as cl +import numpy as np +import pylab as pl +import signal + + #Get platform. +platform = cl.get_platforms()[0] +#Get device. +device = platform.get_devices(cl.device_type.GPU)[0] +print "Running on", device.name, "of", platform.name, "platform." +#Creates GPU Context +ctx = cl.Context([device]) +#Create CommandQueue on the GPU Context +queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) + +nb_runs = 100 +nb = 3 * 256 ** 3 +nb_strides = 33 +strides = np.zeros(nb_strides - 1) + +input_tab = np.ones(nb).astype(np.float32) +output_tab = np.zeros(nb).astype(np.float32) + +input_buffer = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=input_tab) +output_buffer = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=output_tab) + +p = (input_buffer.size + output_buffer.size) * 100 / (device.global_mem_size * 1.) +print "Total memory allocated on gpu :", (input_buffer.size + output_buffer.size) * 1e-6, "MB ", +print "(", p, " % of total available memory)" + +kernel_src = """ +__kernel void strideCopy(__global float* idata, + __global float* odata, + int stride + ) +{ + int xid = get_global_id(0) * stride; + odata[xid] = idata[xid]; +} + +""" + +prg = cl.Program(ctx, kernel_src).build() + +f = open("./strides.dat", 'w') +for s in xrange(1, nb_strides): + t = 0. + for r in xrange(nb_runs): + evt = prg.strideCopy(queue, + (nb / 32,), + None, + input_buffer, + output_buffer, + np.uint32(s)) + queue.finish() + t += (evt.profile.end - evt.profile.start) * 1e-9 + strides[s - 1] = (((2 * 4 * (nb / 32))) * 1e-9) / (t / (nb_runs * 1.0)) + print 'Stride : ', s, ' -> ', strides[s - 1], 'GBytes/s' + f.write(str(s) + "\t" + str(strides[s - 1]) + "\n") +f.close() -- GitLab