Restored VCL header generation functionnality
This commit is contained in:
@@ -8,6 +8,7 @@
|
|||||||
#include "viennacl/tools/tools.hpp"
|
#include "viennacl/tools/tools.hpp"
|
||||||
#include "viennacl/tools/timer.hpp"
|
#include "viennacl/tools/timer.hpp"
|
||||||
#include "viennacl/scheduler/forwards.h"
|
#include "viennacl/scheduler/forwards.h"
|
||||||
|
#include "viennacl/scheduler/io.hpp"
|
||||||
|
|
||||||
#include "atidlas/forwards.h"
|
#include "atidlas/forwards.h"
|
||||||
#include "atidlas/templates/template_base.hpp"
|
#include "atidlas/templates/template_base.hpp"
|
||||||
@@ -25,6 +26,7 @@ namespace atidlas
|
|||||||
|
|
||||||
inline void execute(template_base const & T, statements_container const & statements, viennacl::ocl::context & ctx = viennacl::ocl::current_context(), bool force_compilation = false)
|
inline void execute(template_base const & T, statements_container const & statements, viennacl::ocl::context & ctx = viennacl::ocl::current_context(), bool force_compilation = false)
|
||||||
{
|
{
|
||||||
|
//std::cout << statements.data().front() << std::endl;
|
||||||
//Generate program name
|
//Generate program name
|
||||||
std::string program_name = tools::statements_representation(statements, BIND_TO_HANDLE);
|
std::string program_name = tools::statements_representation(statements, BIND_TO_HANDLE);
|
||||||
execution_handler handler(program_name, ctx, ctx.current_device(), force_compilation);
|
execution_handler handler(program_name, ctx, ctx.current_device(), force_compilation);
|
||||||
|
@@ -148,7 +148,7 @@ private:
|
|||||||
str[0] = "#namereg";
|
str[0] = "#namereg";
|
||||||
else
|
else
|
||||||
for (unsigned int a = 0; a < simd_width; ++a)
|
for (unsigned int a = 0; a < simd_width; ++a)
|
||||||
str[a] = "#namereg.s" + tools::to_string(a);
|
str[a] = append_simd_suffix("#namereg.s", a);
|
||||||
|
|
||||||
for (unsigned int k = 0; k < exprs.size(); ++k)
|
for (unsigned int k = 0; k < exprs.size(); ++k)
|
||||||
{
|
{
|
||||||
|
@@ -110,7 +110,7 @@ private:
|
|||||||
str[0] = "#namereg";
|
str[0] = "#namereg";
|
||||||
else
|
else
|
||||||
for (unsigned int a = 0; a < simd_width; ++a)
|
for (unsigned int a = 0; a < simd_width; ++a)
|
||||||
str[a] = "#namereg.s" + to_string(a);
|
str[a] = append_simd_suffix("#namereg.s",a);
|
||||||
|
|
||||||
|
|
||||||
for (unsigned int k = 0; k < exprs.size(); ++k)
|
for (unsigned int k = 0; k < exprs.size(); ++k)
|
||||||
|
@@ -383,6 +383,14 @@ protected:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
static std::string append_simd_suffix(std::string const & str, unsigned int i)
|
||||||
|
{
|
||||||
|
assert(i < 16);
|
||||||
|
static char suffixes[] = {'0','1','2','3','4','5','6','7','8','9',
|
||||||
|
'a','b','c','d','e','f'};
|
||||||
|
return str + tools::to_string(suffixes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
static bool is_offset_modifier(viennacl::scheduler::statement_node const & node)
|
static bool is_offset_modifier(viennacl::scheduler::statement_node const & node)
|
||||||
{
|
{
|
||||||
return node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE
|
return node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE
|
||||||
|
@@ -1,24 +1,23 @@
|
|||||||
#will save the archive into /tmp/name-of-operation.dat
|
viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/
|
||||||
tmp-folder = /tmp/
|
|
||||||
|
|
||||||
[vector-axpy]
|
[vector-axpy]
|
||||||
devices = 0
|
devices = 0
|
||||||
precision = single
|
precision = single, double
|
||||||
#~ size = 10000000
|
size = 5000000
|
||||||
#~
|
|
||||||
#~ [matrix-axpy]
|
[matrix-axpy]
|
||||||
#~ devices = 0
|
devices = 0
|
||||||
#~ precision = single
|
precision = single, double
|
||||||
#~ size = 3072, 3072
|
size = 2560, 2560
|
||||||
#~
|
|
||||||
#~ [row-wise-reduction]
|
[row-wise-reduction]
|
||||||
#~ devices = 0
|
devices = 0
|
||||||
#~ precision = single
|
precision = single, double
|
||||||
#~ layout = N, T
|
layout = N,T
|
||||||
#~ size = 3968, 3968
|
size = 2560, 2560
|
||||||
|
|
||||||
[matrix-product]
|
[matrix-product]
|
||||||
devices = 0
|
devices = 0
|
||||||
precision = single
|
precision = single
|
||||||
layout = NT
|
layout = NN,NT,TN,TT
|
||||||
#size = 1536, 1536, 1536
|
size = 1536, 1536, 1536
|
||||||
|
@@ -54,6 +54,8 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
|||||||
confdevices = p['devices']
|
confdevices = p['devices']
|
||||||
devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
|
devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
|
||||||
precisions = map_to_list(str, p['precision'])
|
precisions = map_to_list(str, p['precision'])
|
||||||
|
if 'all' in precisions:
|
||||||
|
precisions = ['single','double']
|
||||||
datatypes = [DATATYPES[k] for k in precisions]
|
datatypes = [DATATYPES[k] for k in precisions]
|
||||||
#Iterate through the datatypes and the devices
|
#Iterate through the datatypes and the devices
|
||||||
for datatype, device in itertools.product(datatypes, devices):
|
for datatype, device in itertools.product(datatypes, devices):
|
||||||
@@ -64,20 +66,23 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
|||||||
if datatype is vcl.float64 and not device.double_fp_config:
|
if datatype is vcl.float64 and not device.double_fp_config:
|
||||||
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
|
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
|
||||||
continue
|
continue
|
||||||
#Helper
|
#Helper for execution
|
||||||
def execute(device, statement, other_params, sizes, fname = os.devnull, parameters = None):
|
def execute(device, node, other_params, sizes, fname = os.devnull, parameters = None):
|
||||||
if parameters:
|
if parameters:
|
||||||
TemplateType = TYPES[operation]['template']
|
TemplateType = TYPES[operation]['template']
|
||||||
return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),*other_params), statement, device)
|
return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),*other_params), statement, device)
|
||||||
print('-----')
|
print('-----')
|
||||||
print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
|
print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
|
||||||
with open(fname, "w+") as archive:
|
with open(fname, "w+") as archive:
|
||||||
return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
with vcl.Statement(node) as statement:
|
||||||
|
return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
||||||
lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
|
lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
|
||||||
#Helper
|
#Helper for tuning
|
||||||
def tune(execution_handler, nTuning, nDataPoints, draw):
|
def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters):
|
||||||
if 'size' in p:
|
if 'size' in p:
|
||||||
profile = execution_handler(map_to_list(int, p['size']))
|
profile = execution_handler(map_to_list(int, p['size']))
|
||||||
|
if 'viennacl-src-root' in config:
|
||||||
|
tools.update_viennacl_headers(config['viennacl-src-root'],device,datatype,operation,additional_parameters,profile)
|
||||||
else:
|
else:
|
||||||
def compute_perf(x, t):
|
def compute_perf(x, t):
|
||||||
return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
|
return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
|
||||||
@@ -89,15 +94,17 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
|||||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||||
x = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
x = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||||
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||||
return execute(device, vcl.Statement(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y))), (), sizes, fname, parameters)
|
z = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||||
tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=10, high=100000, size=1))
|
return execute(device, vcl.Assign(z, vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y))), (), sizes, fname, parameters)
|
||||||
|
tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ())
|
||||||
#Matrix AXPY
|
#Matrix AXPY
|
||||||
if operation=='matrix-axpy':
|
if operation=='matrix-axpy':
|
||||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||||
A = vcl.Matrix(sizes, context=ctx, dtype=datatype)
|
A = vcl.Matrix(sizes, context=ctx, dtype=datatype)
|
||||||
B = vcl.Matrix(sizes, context=ctx, dtype=datatype)
|
B = vcl.Matrix(sizes, context=ctx, dtype=datatype)
|
||||||
return execute(device, vcl.Statement(A+B), (), sizes, fname, parameters)
|
C = vcl.Matrix(sizes, context=ctx, dtype=datatype)
|
||||||
tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=5, high=100, size=2))
|
return execute(device, vcl.Assign(C,A+B), (), sizes, fname, parameters)
|
||||||
|
tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=5, high=100, size=2), ())
|
||||||
#Row-wise reduction
|
#Row-wise reduction
|
||||||
if operation=='row-wise-reduction':
|
if operation=='row-wise-reduction':
|
||||||
layouts = map_to_list(str,p['layout'])
|
layouts = map_to_list(str,p['layout'])
|
||||||
@@ -107,9 +114,10 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
|||||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||||
A = vcl.Matrix(sizes if A_trans=='N' else sizes[::-1], context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
|
A = vcl.Matrix(sizes if A_trans=='N' else sizes[::-1], context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
|
||||||
x = vcl.Vector(sizes[1] if A_trans=='N' else sizes[0], context=ctx, dtype=datatype)
|
x = vcl.Vector(sizes[1] if A_trans=='N' else sizes[0], context=ctx, dtype=datatype)
|
||||||
|
y = vcl.Vector(sizes[0] if A_trans=='N' else sizes[1], context=ctx, dtype=datatype)
|
||||||
LHS = A if A_trans=='N' else A.T
|
LHS = A if A_trans=='N' else A.T
|
||||||
execute(device, vcl.Statement(LHS*x), (), sizes, fname, parameters)
|
return execute(device, vcl.Assign(y, LHS*x), (), sizes, fname, parameters)
|
||||||
tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=5, high=100, size=2))
|
tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=5, high=100, size=2), (A_trans,))
|
||||||
#Matrix Product
|
#Matrix Product
|
||||||
if operation=='matrix-product':
|
if operation=='matrix-product':
|
||||||
layouts = map_to_list(str,p['layout'])
|
layouts = map_to_list(str,p['layout'])
|
||||||
@@ -126,9 +134,8 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
|||||||
alpha = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
alpha = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
||||||
beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
||||||
C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
|
C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
|
||||||
statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta))
|
return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters)
|
||||||
return execute(device, statement,(A_trans, B_trans), sizes, fname, parameters)
|
tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1]))
|
||||||
tune(execution_handler, 50, 10000, lambda : 64*np.random.randint(low=1, high=40, size=3))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1,6 +1,11 @@
|
|||||||
from __future__ import division
|
from __future__ import division
|
||||||
import pyopencl
|
import pyopencl
|
||||||
import time
|
import time
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pyopencl as cl
|
||||||
|
import pyviennacl as vcl
|
||||||
from pyviennacl.atidlas import StatementsTuple
|
from pyviennacl.atidlas import StatementsTuple
|
||||||
|
|
||||||
class PhysicalLimitsNV:
|
class PhysicalLimitsNV:
|
||||||
@@ -158,7 +163,6 @@ def benchmark(template, statement, device):
|
|||||||
if occupancy_record.occupancy < 15 :
|
if occupancy_record.occupancy < 15 :
|
||||||
raise ValueError("Template has too low occupancy")
|
raise ValueError("Template has too low occupancy")
|
||||||
else:
|
else:
|
||||||
#~ try:
|
|
||||||
template.execute(statement, True)
|
template.execute(statement, True)
|
||||||
statement.result.context.finish_all_queues()
|
statement.result.context.finish_all_queues()
|
||||||
N = 0
|
N = 0
|
||||||
@@ -170,5 +174,138 @@ def benchmark(template, statement, device):
|
|||||||
current_time += time.time() - time_before
|
current_time += time.time() - time_before
|
||||||
N+=1
|
N+=1
|
||||||
return current_time/N
|
return current_time/N
|
||||||
#~ except:
|
|
||||||
#~ raise ValueError("Invalid template")
|
|
||||||
|
def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
|
||||||
|
|
||||||
|
def sanitize_string(string, keep_chars = ['_']):
|
||||||
|
string = string.replace(' ', '_').replace('-', '_').lower()
|
||||||
|
string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
|
||||||
|
return string
|
||||||
|
|
||||||
|
def append_include(data, path):
|
||||||
|
include_name = '#include "' + path +'"\n'
|
||||||
|
already_included = data.find(include_name)
|
||||||
|
if already_included == -1:
|
||||||
|
insert_index = data.index('\n', data.index('#define')) + 1
|
||||||
|
return data[:insert_index] + '\n' + include_name + data[insert_index:]
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database")
|
||||||
|
if not os.path.isdir(builtin_database_dir):
|
||||||
|
raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n'
|
||||||
|
'Your version of ViennaCL may be too old and/or corrupted.')
|
||||||
|
|
||||||
|
function_name_dict = { vcl.float32: 'add_4B',
|
||||||
|
vcl.float64: 'add_8B' }
|
||||||
|
|
||||||
|
additional_parameters_dict = {'N': "char_to_type<'N'>",
|
||||||
|
'T': "char_to_type<'T'>"}
|
||||||
|
|
||||||
|
#Create the device-specific headers
|
||||||
|
cpp_device_name = sanitize_string(device.name)
|
||||||
|
function_name = function_name_dict[datatype]
|
||||||
|
operation = operation.replace('-','_')
|
||||||
|
|
||||||
|
cpp_class_name = operation + '_template'
|
||||||
|
header_name = cpp_device_name + ".hpp"
|
||||||
|
function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \
|
||||||
|
[additional_parameters_dict[x] for x in additional_parameters]) + ')'
|
||||||
|
|
||||||
|
|
||||||
|
device_type_prefix = {
|
||||||
|
cl.device_type.GPU: 'gpu',
|
||||||
|
cl.device_type.CPU: 'cpu',
|
||||||
|
cl.device_type.ACCELERATOR: 'accelerator'
|
||||||
|
}[device.type]
|
||||||
|
vendor_prefix = {
|
||||||
|
vcl.opencl.VendorId.beignet_id: 'beignet',
|
||||||
|
vcl.opencl.VendorId.nvidia_id: 'nvidia',
|
||||||
|
vcl.opencl.VendorId.amd_id: 'amd',
|
||||||
|
vcl.opencl.VendorId.intel_id: 'intel'
|
||||||
|
}[device.vendor_id]
|
||||||
|
architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name)
|
||||||
|
|
||||||
|
header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family]
|
||||||
|
header_directory = os.path.join(builtin_database_dir, *header_hierarchy)
|
||||||
|
header_path = os.path.join(header_directory, header_name)
|
||||||
|
|
||||||
|
if not os.path.exists(header_directory):
|
||||||
|
os.makedirs(header_directory)
|
||||||
|
|
||||||
|
if os.path.exists(header_path):
|
||||||
|
with open (header_path, "r") as myfile:
|
||||||
|
data=myfile.read()
|
||||||
|
else:
|
||||||
|
data = ''
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper()
|
||||||
|
data = ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
|
||||||
|
'#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
|
||||||
|
'\n'
|
||||||
|
'#include "viennacl/device_specific/forwards.h"\n'
|
||||||
|
'#include "viennacl/device_specific/builtin_database/common.hpp"\n'
|
||||||
|
'\n'
|
||||||
|
'namespace viennacl{\n'
|
||||||
|
'namespace device_specific{\n'
|
||||||
|
'namespace builtin_database{\n'
|
||||||
|
'namespace devices{\n'
|
||||||
|
'namespace ' + device_type_prefix + '{\n'
|
||||||
|
'namespace ' + vendor_prefix + '{\n'
|
||||||
|
'namespace ' + architecture_family + '{\n'
|
||||||
|
'namespace ' + cpp_device_name + '{\n'
|
||||||
|
'\n'
|
||||||
|
'}\n'
|
||||||
|
'}\n'
|
||||||
|
'}\n'
|
||||||
|
'}\n'
|
||||||
|
'}\n'
|
||||||
|
'}\n'
|
||||||
|
'}\n'
|
||||||
|
'}\n'
|
||||||
|
'#endif\n'
|
||||||
|
'')
|
||||||
|
|
||||||
|
data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp')
|
||||||
|
device_type = {
|
||||||
|
cl.device_type.GPU: 'CL_DEVICE_TYPE_GPU',
|
||||||
|
cl.device_type.CPU: 'CL_DEVICE_TYPE_CPU',
|
||||||
|
cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR'
|
||||||
|
}[device.type]
|
||||||
|
add_to_database_arguments = [vendor_prefix + '_id', device_type, 'ocl::'+architecture_family,
|
||||||
|
'"' + device.name + '"', cpp_class_name + '::parameters_type(' + ','.join(map(str,parameters)) + ')']
|
||||||
|
core = ' db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');'
|
||||||
|
|
||||||
|
already_declared = data.find(function_declaration)
|
||||||
|
if already_declared==-1:
|
||||||
|
substr = 'namespace ' + cpp_device_name + '{\n'
|
||||||
|
insert_index = data.index(substr) + len(substr)
|
||||||
|
data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:]
|
||||||
|
else:
|
||||||
|
i1 = data.find('{', already_declared)
|
||||||
|
if data[i1-1]=='\n':
|
||||||
|
i1 = i1 - 1
|
||||||
|
i2 = data.find('}', already_declared) + 1
|
||||||
|
data = data[:i1] + '\n{\n' + core + '\n}' + data[i2:]
|
||||||
|
|
||||||
|
#Write the header file
|
||||||
|
with open(header_path, "w+") as myfile:
|
||||||
|
myfile.write(data)
|
||||||
|
|
||||||
|
#Updates the global ViennaCL headers
|
||||||
|
with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header:
|
||||||
|
data = operation_header.read()
|
||||||
|
data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir)))
|
||||||
|
|
||||||
|
scope_name = '_'.join(('init', operation) + additional_parameters)
|
||||||
|
scope = data.index(scope_name)
|
||||||
|
function_call = ' ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')'
|
||||||
|
if function_call not in data:
|
||||||
|
insert_index = data.rindex('\n', 0, data.index('return result', scope))
|
||||||
|
data = data[:insert_index] + function_call + ';\n' + data[insert_index:]
|
||||||
|
|
||||||
|
operation_header.seek(0)
|
||||||
|
operation_header.truncate()
|
||||||
|
operation_header.write(data)
|
||||||
|
@@ -1,33 +0,0 @@
|
|||||||
import pyopencl as cl
|
|
||||||
import pyviennacl as vcl
|
|
||||||
|
|
||||||
all_devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
|
|
||||||
|
|
||||||
DEVICE_TYPE_PREFIX = { cl.device_type.GPU: 'gpu',
|
|
||||||
cl.device_type.CPU: 'cpu',
|
|
||||||
cl.device_type.ACCELERATOR: 'accelerator'
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICE_TYPE_CL_NAME = { cl.device_type.GPU: 'CL_DEVICE_TYPE_GPU',
|
|
||||||
cl.device_type.CPU: 'CL_DEVICE_TYPE_CPU',
|
|
||||||
cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR'
|
|
||||||
}
|
|
||||||
|
|
||||||
VENDOR_PREFIX = { vcl.opencl.VendorId.beignet_id: 'beignet',
|
|
||||||
vcl.opencl.VendorId.nvidia_id: 'nvidia',
|
|
||||||
vcl.opencl.VendorId.amd_id: 'amd',
|
|
||||||
vcl.opencl.VendorId.intel_id: 'intel'
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICES_PRESETS = {'all': all_devices,
|
|
||||||
'gpus': [d for d in all_devices if d.type==cl.device_type.GPU],
|
|
||||||
'cpus': [d for d in all_devices if d.type==cl.device_type.CPU],
|
|
||||||
'accelerators': [d for d in all_devices if d.type==cl.device_type.ACCELERATOR]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_string(string, keep_chars = ['_']):
|
|
||||||
string = string.replace(' ', '_').lower()
|
|
||||||
string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
|
|
||||||
return string
|
|
@@ -1,116 +0,0 @@
|
|||||||
import sys
|
|
||||||
import os
|
|
||||||
import utils
|
|
||||||
|
|
||||||
def append_include(data, path):
|
|
||||||
include_name = '#include "' + path +'"\n'
|
|
||||||
already_included = data.find(include_name)
|
|
||||||
if already_included == -1:
|
|
||||||
insert_index = data.index('\n', data.index('#define')) + 1
|
|
||||||
return data[:insert_index] + '\n' + include_name + data[insert_index:]
|
|
||||||
return data
|
|
||||||
|
|
||||||
def generate_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
|
|
||||||
builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database")
|
|
||||||
if not os.path.isdir(builtin_database_dir):
|
|
||||||
raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n'
|
|
||||||
'Your version of ViennaCL may be too old and/or corrupted.')
|
|
||||||
|
|
||||||
function_name_dict = { vcl.float32: 'add_4B',
|
|
||||||
vcl.float64: 'add_8B' }
|
|
||||||
|
|
||||||
additional_parameters_dict = {'N': "char_to_type<'N'>",
|
|
||||||
'T': "char_to_type<'T'>"}
|
|
||||||
|
|
||||||
#Create the device-specific headers
|
|
||||||
cpp_device_name = utils.sanitize_string(device.name)
|
|
||||||
function_name = function_name_dict[datatype]
|
|
||||||
operation = operation.replace('-','_')
|
|
||||||
|
|
||||||
cpp_class_name = operation + '_template'
|
|
||||||
header_name = cpp_device_name + ".hpp"
|
|
||||||
function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \
|
|
||||||
[additional_parameters_dict[x] for x in additional_parameters]) + ')'
|
|
||||||
|
|
||||||
device_type_prefix = utils.DEVICE_TYPE_PREFIX[device.type]
|
|
||||||
vendor_prefix = utils.VENDOR_PREFIX[device.vendor_id]
|
|
||||||
architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name)
|
|
||||||
|
|
||||||
header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family]
|
|
||||||
header_directory = os.path.join(builtin_database_dir, *header_hierarchy)
|
|
||||||
header_path = os.path.join(header_directory, header_name)
|
|
||||||
|
|
||||||
if not os.path.exists(header_directory):
|
|
||||||
os.makedirs(header_directory)
|
|
||||||
|
|
||||||
if os.path.exists(header_path):
|
|
||||||
with open (header_path, "r") as myfile:
|
|
||||||
data=myfile.read()
|
|
||||||
else:
|
|
||||||
data = ''
|
|
||||||
|
|
||||||
if not data:
|
|
||||||
ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper()
|
|
||||||
data = ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
|
|
||||||
'#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
|
|
||||||
'\n'
|
|
||||||
'#include "viennacl/device_specific/forwards.h"\n'
|
|
||||||
'#include "viennacl/device_specific/builtin_database/common.hpp"\n'
|
|
||||||
'\n'
|
|
||||||
'namespace viennacl{\n'
|
|
||||||
'namespace device_specific{\n'
|
|
||||||
'namespace builtin_database{\n'
|
|
||||||
'namespace devices{\n'
|
|
||||||
'namespace ' + device_type_prefix + '{\n'
|
|
||||||
'namespace ' + vendor_prefix + '{\n'
|
|
||||||
'namespace ' + architecture_family + '{\n'
|
|
||||||
'namespace ' + cpp_device_name + '{\n'
|
|
||||||
'\n'
|
|
||||||
'}\n'
|
|
||||||
'}\n'
|
|
||||||
'}\n'
|
|
||||||
'}\n'
|
|
||||||
'}\n'
|
|
||||||
'}\n'
|
|
||||||
'}\n'
|
|
||||||
'}\n'
|
|
||||||
'#endif\n'
|
|
||||||
'')
|
|
||||||
|
|
||||||
data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp')
|
|
||||||
|
|
||||||
add_to_database_arguments = [vendor_prefix + '_id', utils.DEVICE_TYPE_CL_NAME[device.type], 'ocl::'+architecture_family,
|
|
||||||
'"' + device.name + '"', cpp_class_name + '::parameters' + str(parameters)]
|
|
||||||
core = ' db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');'
|
|
||||||
|
|
||||||
already_declared = data.find(function_declaration)
|
|
||||||
if already_declared==-1:
|
|
||||||
substr = 'namespace ' + cpp_device_name + '{\n'
|
|
||||||
insert_index = data.index(substr) + len(substr)
|
|
||||||
data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:]
|
|
||||||
else:
|
|
||||||
i1 = data.find('{', already_declared)
|
|
||||||
if data[i1-1]=='\n':
|
|
||||||
i1 = i1 - 1
|
|
||||||
i2 = data.find('}', already_declared) + 1
|
|
||||||
data = data[:i1] + '\n{\n' + core + '\n}' + data[i2:]
|
|
||||||
|
|
||||||
#Write the header file
|
|
||||||
with open(header_path, "w+") as myfile:
|
|
||||||
myfile.write(data)
|
|
||||||
|
|
||||||
#Updates the global ViennaCL headers
|
|
||||||
with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header:
|
|
||||||
data = operation_header.read()
|
|
||||||
data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir)))
|
|
||||||
|
|
||||||
scope_name = '_'.join(('init', operation) + additional_parameters)
|
|
||||||
scope = data.index(scope_name)
|
|
||||||
function_call = ' ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')'
|
|
||||||
if function_call not in data:
|
|
||||||
insert_index = data.rindex('\n', 0, data.index('return result', scope))
|
|
||||||
data = data[:insert_index] + function_call + ';\n' + data[insert_index:]
|
|
||||||
|
|
||||||
operation_header.seek(0)
|
|
||||||
operation_header.truncate()
|
|
||||||
operation_header.write(data)
|
|
Reference in New Issue
Block a user