Restored VCL header generation functionnality

2014-10-05 05:16:21 +02:00
parent fc8b450a7c
commit acb7fe73e8
9 changed files with 189 additions and 185 deletions
--- a/atidlas/execute.hpp
+++ b/atidlas/execute.hpp
@@ -8,6 +8,7 @@
 #include "viennacl/tools/tools.hpp"
 #include "viennacl/tools/timer.hpp"
 #include "viennacl/scheduler/forwards.h"
+#include "viennacl/scheduler/io.hpp"

 #include "atidlas/forwards.h"
 #include "atidlas/templates/template_base.hpp"
@@ -25,6 +26,7 @@ namespace atidlas

 inline void execute(template_base const & T, statements_container const & statements, viennacl::ocl::context & ctx = viennacl::ocl::current_context(), bool force_compilation = false)
 {
+  //std::cout << statements.data().front() << std::endl;
  //Generate program name
  std::string program_name = tools::statements_representation(statements, BIND_TO_HANDLE);
  execution_handler handler(program_name, ctx, ctx.current_device(), force_compilation);
--- a/atidlas/templates/reduction.hpp
+++ b/atidlas/templates/reduction.hpp
@@ -148,7 +148,7 @@ private:
          str[0] = "#namereg";
        else
          for (unsigned int a = 0; a < simd_width; ++a)
-            str[a] = "#namereg.s" + tools::to_string(a);
+            str[a] = append_simd_suffix("#namereg.s", a);

        for (unsigned int k = 0; k < exprs.size(); ++k)
        {
--- a/atidlas/templates/row_wise_reduction.hpp
+++ b/atidlas/templates/row_wise_reduction.hpp
@@ -110,7 +110,7 @@ private:
          str[0] = "#namereg";
        else
          for (unsigned int a = 0; a < simd_width; ++a)
-            str[a] = "#namereg.s" + to_string(a);
+            str[a] = append_simd_suffix("#namereg.s",a);


        for (unsigned int k = 0; k < exprs.size(); ++k)
--- a/atidlas/templates/template_base.hpp
+++ b/atidlas/templates/template_base.hpp
@@ -383,6 +383,14 @@ protected:

 protected:

+  static std::string append_simd_suffix(std::string const & str, unsigned int i)
+  {
+    assert(i < 16);
+    static char suffixes[] = {'0','1','2','3','4','5','6','7','8','9',
+                             'a','b','c','d','e','f'};
+    return str + tools::to_string(suffixes[i]);
+  }
+  
  static bool is_offset_modifier(viennacl::scheduler::statement_node const & node)
  {
    return node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE
--- a/autotune/config.ini
+++ b/autotune/config.ini
@@ -1,24 +1,23 @@
-#will save the archive into /tmp/name-of-operation.dat
-tmp-folder = /tmp/
+viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/

 [vector-axpy]
 devices = 0
-precision = single
-#~ size = 10000000
-#~ 
-#~ [matrix-axpy]
-#~ devices = 0
-#~ precision = single
-#~ size = 3072, 3072
-#~ 
-#~ [row-wise-reduction]
-#~ devices = 0
-#~ precision = single
-#~ layout = N, T
-#~ size = 3968, 3968
+precision = single, double
+size = 5000000
+
+[matrix-axpy]
+devices = 0
+precision = single, double
+size = 2560, 2560
+
+[row-wise-reduction]
+devices = 0
+precision = single, double
+layout = N,T
+size = 2560, 2560

 [matrix-product]
 devices = 0
 precision = single
-layout = NT
-#size = 1536, 1536, 1536
+layout = NN,NT,TN,TT
+size = 1536, 1536, 1536
--- a/autotune/python/autotune.py
+++ b/autotune/python/autotune.py
@@ -54,6 +54,8 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
            confdevices = p['devices']
            devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
            precisions =  map_to_list(str, p['precision'])
+            if 'all' in precisions:
+                precisions = ['single','double']
            datatypes = [DATATYPES[k] for k in precisions]
            #Iterate through the datatypes and the devices
            for datatype, device in itertools.product(datatypes, devices):
@@ -64,20 +66,23 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
                if datatype is vcl.float64 and not device.double_fp_config:
                    sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
                    continue
-                #Helper
-                def execute(device, statement, other_params, sizes, fname = os.devnull, parameters = None):
+                #Helper for execution
+                def execute(device, node, other_params, sizes, fname = os.devnull, parameters = None):
                    if parameters:
                        TemplateType = TYPES[operation]['template']
                        return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),*other_params), statement, device)
                    print('-----')
                    print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
                    with open(fname, "w+") as archive:
-                        return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
+                        with vcl.Statement(node) as statement:
+                            return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
                                                lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
-                #Helper
-                def tune(execution_handler, nTuning, nDataPoints, draw):
+                #Helper for tuning
+                def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters):
                    if 'size' in p:
                        profile = execution_handler(map_to_list(int, p['size']))
+                        if 'viennacl-src-root' in config:
+                            tools.update_viennacl_headers(config['viennacl-src-root'],device,datatype,operation,additional_parameters,profile)
                    else:
                        def compute_perf(x, t):
                            return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
@@ -89,15 +94,17 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
                    def execution_handler(sizes, fname=os.devnull, parameters=None):
                        x = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
                        y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
-                        return execute(device, vcl.Statement(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y))), (), sizes, fname, parameters)
-                    tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=10, high=100000, size=1))
+                        z = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
+                        return execute(device, vcl.Assign(z, vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y))), (), sizes, fname, parameters)
+                    tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ())
                #Matrix AXPY
                if operation=='matrix-axpy':
                    def execution_handler(sizes, fname=os.devnull, parameters=None):
                        A = vcl.Matrix(sizes, context=ctx, dtype=datatype)
                        B = vcl.Matrix(sizes, context=ctx, dtype=datatype)
-                        return execute(device, vcl.Statement(A+B), (), sizes, fname, parameters)
-                    tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=5, high=100, size=2))
+                        C = vcl.Matrix(sizes, context=ctx, dtype=datatype)
+                        return execute(device, vcl.Assign(C,A+B), (), sizes, fname, parameters)
+                    tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=5, high=100, size=2), ())
                #Row-wise reduction
                if operation=='row-wise-reduction':
                    layouts = map_to_list(str,p['layout'])
@@ -107,9 +114,10 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
                        def execution_handler(sizes, fname=os.devnull, parameters=None):
                            A = vcl.Matrix(sizes if A_trans=='N' else sizes[::-1], context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
                            x = vcl.Vector(sizes[1] if A_trans=='N' else sizes[0], context=ctx, dtype=datatype)
+                            y = vcl.Vector(sizes[0] if A_trans=='N' else sizes[1], context=ctx, dtype=datatype)
                            LHS = A if A_trans=='N' else A.T
-                            execute(device, vcl.Statement(LHS*x), (), sizes, fname, parameters)
-                        tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=5, high=100, size=2))
+                            return execute(device, vcl.Assign(y, LHS*x), (), sizes, fname, parameters)
+                        tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=5, high=100, size=2), (A_trans,))
                #Matrix Product
                if operation=='matrix-product':
                    layouts = map_to_list(str,p['layout'])
@@ -126,9 +134,8 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
                            alpha = vcl.HostScalar(1.0,  context=ctx, dtype = datatype)
                            beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
                            C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
-                            statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta))
-                            return execute(device, statement,(A_trans, B_trans), sizes, fname, parameters)
-                        tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=1, high=40, size=3))
+                            return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters)
+                        tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1]))



--- a/autotune/python/tools.py
+++ b/autotune/python/tools.py
@@ -1,6 +1,11 @@
 from __future__ import division
 import pyopencl
 import time
+import os
+import sys
+
+import pyopencl as cl
+import pyviennacl as vcl
 from pyviennacl.atidlas import StatementsTuple

 class PhysicalLimitsNV:
@@ -158,7 +163,6 @@ def benchmark(template, statement, device):
    if occupancy_record.occupancy < 15 :
        raise ValueError("Template has too low occupancy")
    else:
-        #~ try:
        template.execute(statement, True)
        statement.result.context.finish_all_queues()
        N = 0
@@ -170,5 +174,138 @@ def benchmark(template, statement, device):
            current_time += time.time() - time_before
            N+=1
        return current_time/N
-        #~ except:
-            #~ raise ValueError("Invalid template")
+
+
+def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
+    
+    def sanitize_string(string, keep_chars = ['_']):
+        string = string.replace(' ', '_').replace('-', '_').lower()
+        string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
+        return string
+    
+    def append_include(data, path):
+        include_name = '#include "' + path +'"\n'
+        already_included = data.find(include_name)
+        if already_included == -1:
+            insert_index = data.index('\n', data.index('#define')) + 1
+            return data[:insert_index] + '\n' + include_name + data[insert_index:]
+        return data
+
+
+    builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database")
+    if not os.path.isdir(builtin_database_dir):
+        raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n'
+                                'Your version of ViennaCL may be too old and/or corrupted.')
+
+    function_name_dict = { vcl.float32: 'add_4B',
+                           vcl.float64: 'add_8B' }
+
+    additional_parameters_dict = {'N':  "char_to_type<'N'>",
+                                  'T':  "char_to_type<'T'>"}
+
+    #Create the device-specific headers
+    cpp_device_name = sanitize_string(device.name)
+    function_name = function_name_dict[datatype]
+    operation = operation.replace('-','_')
+
+    cpp_class_name = operation + '_template'
+    header_name = cpp_device_name + ".hpp"
+    function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \
+                                                                          [additional_parameters_dict[x] for x in additional_parameters]) + ')'
+
+
+    device_type_prefix = {  
+                            cl.device_type.GPU: 'gpu',
+                            cl.device_type.CPU: 'cpu',
+                            cl.device_type.ACCELERATOR: 'accelerator'
+                         }[device.type]
+    vendor_prefix = {   
+                        vcl.opencl.VendorId.beignet_id: 'beignet',
+                        vcl.opencl.VendorId.nvidia_id: 'nvidia',
+                        vcl.opencl.VendorId.amd_id: 'amd',
+                        vcl.opencl.VendorId.intel_id: 'intel'
+                    }[device.vendor_id]
+    architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name)
+
+    header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family]
+    header_directory = os.path.join(builtin_database_dir, *header_hierarchy)
+    header_path = os.path.join(header_directory, header_name)
+
+    if not os.path.exists(header_directory):
+        os.makedirs(header_directory)
+
+    if os.path.exists(header_path):
+        with open (header_path, "r") as myfile:
+            data=myfile.read()
+    else:
+        data = ''
+
+    if not data:
+        ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper()
+        data =  ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
+            '#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
+            '\n'
+            '#include "viennacl/device_specific/forwards.h"\n'
+            '#include "viennacl/device_specific/builtin_database/common.hpp"\n'
+            '\n'
+            'namespace viennacl{\n'
+            'namespace device_specific{\n'
+            'namespace builtin_database{\n'
+            'namespace devices{\n'
+            'namespace '  + device_type_prefix + '{\n'
+            'namespace '  + vendor_prefix + '{\n'
+            'namespace '  + architecture_family + '{\n'
+            'namespace '  + cpp_device_name + '{\n'
+            '\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '#endif\n'
+            '')
+
+    data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp')
+    device_type = { 
+                    cl.device_type.GPU: 'CL_DEVICE_TYPE_GPU',
+                    cl.device_type.CPU: 'CL_DEVICE_TYPE_CPU',
+                    cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR'
+                  }[device.type]
+    add_to_database_arguments = [vendor_prefix + '_id', device_type, 'ocl::'+architecture_family,
+                  '"' + device.name + '"',  cpp_class_name + '::parameters_type(' + ','.join(map(str,parameters)) + ')']
+    core = '  db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');'
+
+    already_declared = data.find(function_declaration)
+    if already_declared==-1:
+        substr = 'namespace '  + cpp_device_name + '{\n'
+        insert_index = data.index(substr) + len(substr)
+        data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:]
+    else:
+        i1 = data.find('{', already_declared)
+        if data[i1-1]=='\n':
+            i1 = i1 - 1
+        i2 = data.find('}', already_declared) + 1
+        data = data[:i1]  + '\n{\n' + core + '\n}' + data[i2:]
+
+    #Write the header file
+    with open(header_path, "w+") as myfile:
+        myfile.write(data)
+
+    #Updates the global ViennaCL headers
+    with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header:
+        data = operation_header.read()
+        data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir)))
+
+        scope_name = '_'.join(('init', operation) + additional_parameters)
+        scope = data.index(scope_name)
+        function_call = '  ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')'
+        if function_call not in data:
+            insert_index = data.rindex('\n', 0, data.index('return result', scope))
+            data = data[:insert_index] + function_call + ';\n' + data[insert_index:]
+
+        operation_header.seek(0)
+        operation_header.truncate()
+        operation_header.write(data)
--- a/autotune/python/utils.py
+++ b/autotune/python/utils.py
@@ -1,33 +0,0 @@
-import pyopencl as cl
-import pyviennacl as vcl
-
-all_devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
-
-DEVICE_TYPE_PREFIX = {  cl.device_type.GPU: 'gpu',
-                        cl.device_type.CPU: 'cpu',
-                        cl.device_type.ACCELERATOR: 'accelerator'
-}
-
-DEVICE_TYPE_CL_NAME = { cl.device_type.GPU: 'CL_DEVICE_TYPE_GPU',
-                        cl.device_type.CPU: 'CL_DEVICE_TYPE_CPU',
-                        cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR'
-}
-
-VENDOR_PREFIX = {       vcl.opencl.VendorId.beignet_id: 'beignet',
-                        vcl.opencl.VendorId.nvidia_id: 'nvidia',
-                        vcl.opencl.VendorId.amd_id: 'amd',
-                        vcl.opencl.VendorId.intel_id: 'intel'
-}
-
-DEVICES_PRESETS = {'all': all_devices,
-                   'gpus': [d for d in all_devices if d.type==cl.device_type.GPU],
-                   'cpus': [d for d in all_devices if d.type==cl.device_type.CPU],
-                   'accelerators': [d for d in all_devices if d.type==cl.device_type.ACCELERATOR]
-}
-
-
-
-def sanitize_string(string, keep_chars = ['_']):
-    string = string.replace(' ', '_').lower()
-    string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
-    return string
--- a/autotune/python/vclio.py
+++ b/autotune/python/vclio.py
@@ -1,116 +0,0 @@
-import sys
-import os
-import utils
-
-def append_include(data, path):
-    include_name = '#include "' + path +'"\n'
-    already_included = data.find(include_name)
-    if already_included == -1:
-        insert_index = data.index('\n', data.index('#define')) + 1
-        return data[:insert_index] + '\n' + include_name + data[insert_index:]
-    return data
-
-def generate_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
-    builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database")
-    if not os.path.isdir(builtin_database_dir):
-        raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n'
-                                'Your version of ViennaCL may be too old and/or corrupted.')
-
-    function_name_dict = { vcl.float32: 'add_4B',
-                           vcl.float64: 'add_8B' }
-
-    additional_parameters_dict = {'N':  "char_to_type<'N'>",
-                                  'T':  "char_to_type<'T'>"}
-
-    #Create the device-specific headers
-    cpp_device_name = utils.sanitize_string(device.name)
-    function_name = function_name_dict[datatype]
-    operation = operation.replace('-','_')
-
-    cpp_class_name = operation + '_template'
-    header_name = cpp_device_name + ".hpp"
-    function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \
-                                                                          [additional_parameters_dict[x] for x in additional_parameters]) + ')'
-
-    device_type_prefix = utils.DEVICE_TYPE_PREFIX[device.type]
-    vendor_prefix = utils.VENDOR_PREFIX[device.vendor_id]
-    architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name)
-
-    header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family]
-    header_directory = os.path.join(builtin_database_dir, *header_hierarchy)
-    header_path = os.path.join(header_directory, header_name)
-
-    if not os.path.exists(header_directory):
-        os.makedirs(header_directory)
-
-    if os.path.exists(header_path):
-        with open (header_path, "r") as myfile:
-            data=myfile.read()
-    else:
-        data = ''
-
-    if not data:
-        ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper()
-        data =  ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
-            '#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
-            '\n'
-            '#include "viennacl/device_specific/forwards.h"\n'
-            '#include "viennacl/device_specific/builtin_database/common.hpp"\n'
-            '\n'
-            'namespace viennacl{\n'
-            'namespace device_specific{\n'
-            'namespace builtin_database{\n'
-            'namespace devices{\n'
-            'namespace '  + device_type_prefix + '{\n'
-            'namespace '  + vendor_prefix + '{\n'
-            'namespace '  + architecture_family + '{\n'
-            'namespace '  + cpp_device_name + '{\n'
-            '\n'
-            '}\n'
-            '}\n'
-            '}\n'
-            '}\n'
-            '}\n'
-            '}\n'
-            '}\n'
-            '}\n'
-            '#endif\n'
-            '')
-
-    data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp')
-
-    add_to_database_arguments = [vendor_prefix + '_id', utils.DEVICE_TYPE_CL_NAME[device.type], 'ocl::'+architecture_family,
-                  '"' + device.name + '"',  cpp_class_name + '::parameters' + str(parameters)]
-    core = '  db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');'
-
-    already_declared = data.find(function_declaration)
-    if already_declared==-1:
-        substr = 'namespace '  + cpp_device_name + '{\n'
-        insert_index = data.index(substr) + len(substr)
-        data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:]
-    else:
-        i1 = data.find('{', already_declared)
-        if data[i1-1]=='\n':
-            i1 = i1 - 1
-        i2 = data.find('}', already_declared) + 1
-        data = data[:i1]  + '\n{\n' + core + '\n}' + data[i2:]
-
-    #Write the header file
-    with open(header_path, "w+") as myfile:
-        myfile.write(data)
-
-    #Updates the global ViennaCL headers
-    with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header:
-        data = operation_header.read()
-        data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir)))
-
-        scope_name = '_'.join(('init', operation) + additional_parameters)
-        scope = data.index(scope_name)
-        function_call = '  ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')'
-        if function_call not in data:
-            insert_index = data.rindex('\n', 0, data.index('return result', scope))
-            data = data[:insert_index] + function_call + ';\n' + data[insert_index:]
-
-        operation_header.seek(0)
-        operation_header.truncate()
-        operation_header.write(data)