Benchmark tool enables double precision for OpenCL 1.1 and 1.2

689ad021 · Jean-Matthieu Etancelin · 8ec6215e · 689ad021
Commit 689ad021 authored 12 years ago by Jean-Matthieu Etancelin
--- a/HySoP/hysop/tools/kernel_benchmark.py
+++ b/HySoP/hysop/tools/kernel_benchmark.py
@@ -108,9 +108,8 @@ class BenchmarkSuite:
                    b.launch()
                    [self.timings[self.kernel_name][v[1]][conf[2]].__setitem__(
                            t[0], t[1]) for t in b.timings.items()]
-        if not self.test:
-            pickle.dump(self.timings, open(self.pickle_file_name + '.pickle', 'w'), 0)
-            self.write_file()
+                    pickle.dump(self.timings, open(self.pickle_file_name + '.pickle', 'w'), 0)
+                    self.write_file()

    def write_file(self):
        """
@@ -232,26 +231,37 @@ class Benchmark:
        print " over : ", self.device.name,
        print " with ", self.device.opencl_c_version
        print " Precision capability  ",
+        self.correctlyroundeddividesqrt = True
        if precision == 'SP':
            print "for Single Precision: ",
            for v in ['DENORM', 'INF_NAN',
                      'ROUND_TO_NEAREST', 'ROUND_TO_ZERO', 'ROUND_TO_INF',
                      'FMA', 'CORRECTLY_ROUNDED_DIVIDE_SQRT', 'SOFT_FLOAT']:
-                if eval('(self.device.single_fp_config & cl.device_fp_config.' +
-                        v + ') == cl.device_fp_config.' + v):
-                    print v,
+                try:
+                    if eval('(self.device.single_fp_config & cl.device_fp_config.' +
+                            v + ') == cl.device_fp_config.' + v):
+                        print v,
+                except AttributeError as ae:
+                    if v is 'CORRECTLY_ROUNDED_DIVIDE_SQRT':
+                        self.correctlyroundeddividesqrt = False
+                    print '\n', v, 'is not supported in OpenCL C 1.2. Exception catched : ', ae
        else:
            print "for Double Precision: ",
            if self.device.double_fp_config > 0:
                for v in ['DENORM', 'INF_NAN',
                          'ROUND_TO_NEAREST', 'ROUND_TO_ZERO', 'ROUND_TO_INF',
                          'FMA', 'CORRECTLY_ROUNDED_DIVIDE_SQRT', 'SOFT_FLOAT']:
-                    if eval('(self.device.double_fp_config & cl.device_fp_config.' +
-                            v + ') == cl.device_fp_config.' + v):
-                        print v,
+                    try:
+                        if eval('(self.device.double_fp_config & cl.device_fp_config.' +
+                                v + ') == cl.device_fp_config.' + v):
+                            print v,
+                    except AttributeError as ae:
+                        if v is 'CORRECTLY_ROUNDED_DIVIDE_SQRT':
+                            self.correctlyroundeddividesqrt = False
+                        print '\n', v, 'is supported in OpenCL C 1.2. Exception catched : ', ae
            else:
                raise ValueError("Double Precision is not supported by device")
-        print "\n\n"
+        print ""
        #Creates GPU Context
        ## OpenCL context
        self.ctx = cl.Context([self.device])
@@ -280,13 +290,16 @@ class Benchmark:
        if self.code is not None:
            for size in self.sizes:
                if self.is_size_allowed is None or self.is_size_allowed(size):
-                    build_options = " -cl-opt-disable -cl-fp32-correctly-rounded-divide-sqrt "
+                    build_options = " -cl-opt-disable"
+                    if self.correctlyroundeddividesqrt:
+                        build_options += " -cl-fp32-correctly-rounded-divide-sqrt "
                    if self.precision == 'SP':
-                        prg = cl.Program(self.ctx, self.code)
+                        prg = cl.Program(self.ctx, self.code.replace('.0', '.0f'))
                        build_options += " -cl-single-precision-constant "
                    else:
                        prg = cl.Program(self.ctx, self.code.replace('float', 'double'))
                    prg.build(build_options + build_opt(size))
+                    print prg.get_build_info(self.device, cl.program_build_info.OPTIONS)
                    self.prg[size] = prg
        ## Function to setup kernels arguments
        self.kernelSetup = None