Auto-tuner : Initial push
This commit is contained in:
168
autotune/autotune.py
Normal file
168
autotune/autotune.py
Normal file
@@ -0,0 +1,168 @@
|
||||
from __future__ import division
|
||||
|
||||
import argparse
|
||||
import itertools
|
||||
import os
|
||||
|
||||
from external.configobj import ConfigObj
|
||||
|
||||
import pyopencl as cl
|
||||
import pyviennacl as vcl
|
||||
from pyviennacl import backend
|
||||
from pyviennacl import opencl
|
||||
from pyviennacl import atidlas
|
||||
|
||||
import utils
|
||||
import vclio
|
||||
import optimize
|
||||
import sys
|
||||
|
||||
DATATYPES = { 'single' : vcl.float32,
|
||||
'double' : vcl.float64
|
||||
}
|
||||
|
||||
TYPES = { 'vector-axpy': vcl.atidlas.VectorAxpyTemplate,
|
||||
'matrix-axpy': vcl.atidlas.MatrixAxpyTemplate,
|
||||
'reduction': vcl.atidlas.ReductionTemplate,
|
||||
'row-wise-reduction': vcl.atidlas.RowWiseReductionTemplate,
|
||||
'matrix-product': vcl.atidlas.MatrixProductTemplate
|
||||
}
|
||||
|
||||
PNAMES = {
|
||||
'vector-axpy': ['simd-width', 'local-size-0', 'num-groups-0', 'fetch'],
|
||||
'matrix-axpy': ['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'num-groups-1', 'fetch'],
|
||||
'reduction': ['simd-width', 'local-size-0', 'num-groups-0', 'fetch'],
|
||||
'row-wise-reduction': ['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'fetch'],
|
||||
'matrix-product': ['simd-width', 'local-size-0', 'kL', 'local-size-1', 'mS', 'kS', 'nS', 'A-fetch-policy', 'B-fetch-policy', 'local-fetch-size-0', 'local-fetch-size-1']
|
||||
}
|
||||
|
||||
PERFINDEX = { 'vector-axpy' : (lambda x: 3*x[0]*x[1][0]/x[2]*1e-9, 'GB/s') ,
|
||||
'matrix-axpy' : (lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9, 'GB/s'),
|
||||
'reduction' : (lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9, 'GB/s'),
|
||||
'row-wise-reduction' : (lambda x: x[0]*x[1][0]*x[1][1]/x[2]*1e-9, 'GB/s'),
|
||||
'matrix-product': (lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9, 'GFLOPs/s')
|
||||
}
|
||||
|
||||
def parameter_space(operation):
|
||||
simd = [1, 2, 4, 8]
|
||||
pow2_1D = [2**k for k in range(12)]
|
||||
pow2_2D = [2**k for k in range(10)]
|
||||
pow2_2D_unrolled = [2**k for k in range(6)]
|
||||
FetchingPolicy = vcl.device_specific.FetchingPolicy
|
||||
fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS]
|
||||
if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch]
|
||||
if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch]
|
||||
if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
|
||||
if operation == 'row-wise-reduction': return [simd, pow2_2D, pow2_2D, pow2_1D, fetch]
|
||||
if operation == 'matrix-product': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D_unrolled, pow2_2D_unrolled, pow2_2D_unrolled, fetch, fetch, pow2_2D, pow2_2D]
|
||||
|
||||
def do_tuning(config_fname, spec_fname, viennacl_root):
|
||||
|
||||
config = ConfigObj(config_fname, configspec=spec_fname)
|
||||
map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]]))
|
||||
for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']:
|
||||
|
||||
tmp_folder = config['tmp-folder'] if 'tmp-folder' in config else ""
|
||||
|
||||
|
||||
if operation in config:
|
||||
p = config[operation]
|
||||
|
||||
confdevices = p['devices']
|
||||
devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
|
||||
|
||||
precisions = ['single', 'double'] if 'all' in p['precision'] else p['precision']
|
||||
|
||||
datatypes = [DATATYPES[k] for k in precisions]
|
||||
s = map_to_list((int, p['size']))
|
||||
|
||||
for datatype, device in itertools.product(datatypes, devices):
|
||||
ctx = cl.Context([device])
|
||||
ctx = vcl.backend.Context(ctx)
|
||||
device = ctx.current_device
|
||||
|
||||
if datatype is vcl.float64 and not device.double_fp_config:
|
||||
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
|
||||
continue
|
||||
|
||||
pairs = []
|
||||
|
||||
def execute(node, other_params):
|
||||
print('-----')
|
||||
print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')]'))))
|
||||
tmp_file = os.path.join(tmp_folder, utils.sanitize_string(device.name) + "-" + datatype.__name__ + "-" + operation + '-'.join(other_params) + ".dat")
|
||||
if tmp_folder:
|
||||
print('Saving history to ' + tmp_file)
|
||||
fname = tmp_file
|
||||
else:
|
||||
fname = os.devnull
|
||||
with open(fname, "w+") as archive:
|
||||
with vcl.Statement(node) as statement:
|
||||
result = optimize.genetic(statement, ctx, TYPES[operation], lambda p: TYPES[operation](p, *other_params),
|
||||
PNAMES[operation], parameter_space(operation), lambda t: PERFINDEX[operation][0]([datatype().itemsize, s, t]), PERFINDEX[operation][1], archive)
|
||||
if result and viennacl_root:
|
||||
vclio.generate_viennacl_headers(viennacl_root, device, datatype, operation, other_params, result[1])
|
||||
|
||||
if operation=='vector-axpy':
|
||||
x = vcl.Vector(s[0], context=ctx, dtype=datatype)
|
||||
y = vcl.Vector(s[0], context=ctx, dtype=datatype)
|
||||
execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ())
|
||||
|
||||
if operation=='matrix-axpy':
|
||||
A = vcl.Matrix(s, context=ctx, dtype=datatype)
|
||||
B = vcl.Matrix(s, context=ctx, dtype=datatype)
|
||||
execute(A+B, ())
|
||||
|
||||
if operation=='row-wise-reduction':
|
||||
layouts = map_to_list((str,p['layout']))
|
||||
if 'all' in layouts:
|
||||
layouts = ['N', 'T']
|
||||
for A_trans in layouts:
|
||||
A = vcl.Matrix(s if A_trans=='N' else s[::-1], context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
|
||||
x = vcl.Vector(s[1] if A_trans=='N' else s[0], context=ctx, dtype=datatype)
|
||||
LHS = A if A_trans=='N' else A.T
|
||||
execute(LHS*x, (A_trans,))
|
||||
|
||||
if operation=='matrix-product':
|
||||
layouts = map_to_list((str,p['layout']))
|
||||
if 'all' in layouts:
|
||||
layouts = ['NN', 'NT', 'TN', 'TT']
|
||||
for layout in layouts:
|
||||
A_trans = layout[0]
|
||||
B_trans = layout[1]
|
||||
|
||||
A = vcl.Matrix((s[0], s[1]) if A_trans=='N' else (s[1],s[0]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
|
||||
B = vcl.Matrix((s[1], s[2]) if B_trans=='N' else (s[2],s[1]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
|
||||
LHS = A if A_trans=='N' else A.T
|
||||
RHS = B if B_trans=='N' else B.T
|
||||
alpha = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
||||
beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
||||
C = vcl.Matrix((s[0], s[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
|
||||
execute(vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser();
|
||||
|
||||
subparsers = parser.add_subparsers(dest='action')
|
||||
|
||||
print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available')
|
||||
|
||||
tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file')
|
||||
tune_parser.add_argument("--config", default="config.ini", required=False, type=str)
|
||||
tune_parser.add_argument("--viennacl-root", default='', required=False, type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
if(args.action=='list-devices'):
|
||||
print("----------------")
|
||||
print("Devices available:")
|
||||
print("----------------")
|
||||
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
|
||||
for (i, d) in enumerate(devices):
|
||||
print('Device', i, ':', utils.DEVICE_TYPE_PREFIX[d.type].upper() + ':', d.name, 'on', d.platform.name)
|
||||
print("----------------")
|
||||
else:
|
||||
print("------")
|
||||
print("Auto-tuning")
|
||||
print("------")
|
||||
do_tuning(args.config, 'config_spec.ini', args.viennacl_root)
|
126
autotune/genetic_operators.py
Normal file
126
autotune/genetic_operators.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import random
|
||||
import time
|
||||
import pyviennacl as vcl
|
||||
|
||||
from collections import OrderedDict as odict
|
||||
|
||||
def closest_divisor(N, x):
|
||||
x_low=x_high=max(1,min(round(x),N))
|
||||
while N % x_low > 0 and x_low>0:
|
||||
x_low = x_low - 1
|
||||
while N % x_high > 0 and x_high < N:
|
||||
x_high = x_high + 1
|
||||
return x_low if x - x_low < x_high - x else x_high
|
||||
|
||||
class GeneticOperators(object):
|
||||
|
||||
def __init__(self, device, statement, parameters, parameter_names, TemplateType, build_template):
|
||||
self.device = device
|
||||
self.statement = statement
|
||||
self.parameters = parameters
|
||||
self.parameter_names = parameter_names
|
||||
self.TemplateType = TemplateType
|
||||
self.ParameterType = TemplateType.Parameters
|
||||
self.build_template = build_template
|
||||
self.cache = {}
|
||||
|
||||
def init(self):
|
||||
result = [random.choice(L) for L in self.parameters]
|
||||
while self.build_template(self.TemplateType.Parameters(*result)).check(self.statement)!=0:
|
||||
result = [random.choice(L) for L in self.parameters]
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def min_to_hyperbol(a, tup):
|
||||
x = 1
|
||||
for i in range(100):
|
||||
dx = 2*(-a**2/x**3 + a*tup[1]/x**2 - tup[0] + x);
|
||||
ddx = 6*a**2/x**4 - 4*a*tup[1]/x**3 + 2;
|
||||
if abs(dx) < 1e-7 or abs(ddx) < 1e-7:
|
||||
break
|
||||
x-=dx/ddx;
|
||||
if x<1 or x>a:
|
||||
x = max(1, min(x, a))
|
||||
break
|
||||
new_x = int(closest_divisor(a, x))
|
||||
new_y = int(a / new_x)
|
||||
return (new_x, new_y)
|
||||
|
||||
def repair(self,func):
|
||||
def repair_impl(child):
|
||||
D = odict(zip(self.parameter_names, child))
|
||||
dummy_template = self.build_template(self.ParameterType(*D.values()))
|
||||
FetchingPolicy = vcl.atidlas.FetchingPolicy;
|
||||
if 'local-size-1' not in D:
|
||||
D['local-size-0'] = min(D['local-size-0'], self.device.max_work_group_size)
|
||||
elif D['local-size-0']*D['local-size-1'] > self.device.max_work_group_size:
|
||||
res = GeneticOperators.min_to_hyperbol(self.device.max_work_group_size, (D['local-size-0'], D['local-size-1']))
|
||||
D['local-size-0'] = res[0]
|
||||
D['local-size-1'] = res[1]
|
||||
|
||||
if self.ParameterType is vcl.atidlas.MatrixProductTemplate.Parameters:
|
||||
if dummy_template.A_trans != 'N' and dummy_template.B_trans != 'T':
|
||||
D['simd-width'] = 1
|
||||
|
||||
D['mS'] = max(D['mS'], D['simd-width'])
|
||||
D['mS'] = D['mS'] - D['mS']%D['simd-width']
|
||||
|
||||
D['nS'] = max(D['nS'], D['simd-width'])
|
||||
D['nS'] = D['nS'] - D['nS']%D['simd-width']
|
||||
|
||||
|
||||
if D['A-fetch-policy']!=FetchingPolicy.FETCH_FROM_LOCAL and D['B-fetch-policy']!=FetchingPolicy.FETCH_FROM_LOCAL:
|
||||
D['local-fetch-size-0']=D['local-fetch-size-1']=0
|
||||
|
||||
else:
|
||||
res = GeneticOperators.min_to_hyperbol(D['local-size-0']*D['local-size-1'], (D['local-fetch-size-0'], D['local-fetch-size-1']))
|
||||
D['local-fetch-size-0'] = res[0]
|
||||
D['local-fetch-size-1'] = res[1]
|
||||
|
||||
if D['A-fetch-policy']==FetchingPolicy.FETCH_FROM_LOCAL and dummy_template.A_trans=='N' and D['kL'] % D['local-fetch-size-1'] > 0:
|
||||
D['kL'] = max(1,round(D['kL']/D['local-fetch-size-1']))*D['local-fetch-size-1']
|
||||
|
||||
if D['B-fetch-policy']==FetchingPolicy.FETCH_FROM_LOCAL and dummy_template.B_trans=='T' and D['kL'] % D['local-fetch-size-1'] > 0:
|
||||
D['kL'] = max(1,round(D['kL']/D['local-fetch-size-1']))*D['local-fetch-size-1']
|
||||
|
||||
D['kS'] = min(D['kL'], D['kS'])
|
||||
|
||||
return D.values()
|
||||
|
||||
def wrappper(*args, **kargs):
|
||||
offspring = func(*args, **kargs)
|
||||
for child in offspring:
|
||||
new_child = repair_impl(child)
|
||||
for i in range(len(child)):
|
||||
if child[i] != new_child[i]:
|
||||
child[i] = new_child[i]
|
||||
|
||||
return offspring
|
||||
return wrappper
|
||||
|
||||
def mutate(self, individual, indpb):
|
||||
for i in range(len(individual)):
|
||||
if random.random() < indpb:
|
||||
individual[i] = random.choice(self.parameters[i])
|
||||
return individual,
|
||||
|
||||
def evaluate(self, individual):
|
||||
tupindividual = tuple(individual)
|
||||
print tupindividual
|
||||
if tupindividual not in self.cache:
|
||||
template = self.build_template(self.TemplateType.Parameters(*individual))
|
||||
if template.check(self.statement)!=0:
|
||||
self.cache[tupindividual] = 100
|
||||
else:
|
||||
template.execute(self.statement, True)
|
||||
self.statement.result.context.finish_all_queues()
|
||||
N = 0
|
||||
current_time = 0
|
||||
while current_time < 1e-2:
|
||||
time_before = time.time()
|
||||
template.execute(self.statement,False)
|
||||
self.statement.result.context.finish_all_queues()
|
||||
current_time += time.time() - time_before
|
||||
N+=1
|
||||
self.cache[tupindividual] = current_time/N
|
||||
return self.cache[tupindividual],
|
92
autotune/optimize.py
Normal file
92
autotune/optimize.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import array
|
||||
import numpy as np
|
||||
import random
|
||||
import time
|
||||
|
||||
from deap import algorithms
|
||||
from deap import base
|
||||
from deap import creator
|
||||
from deap import tools
|
||||
|
||||
from genetic_operators import GeneticOperators
|
||||
|
||||
def eaMuPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, maxtime,
|
||||
stats=None, halloffame=None, verbose=__debug__):
|
||||
logbook = tools.Logbook()
|
||||
logbook.header = ['gen', 'nevals'] + (stats.fields if stats else [])
|
||||
|
||||
# Evaluate the individuals with an invalid fitness
|
||||
invalid_ind = [ind for ind in population if not ind.fitness.valid]
|
||||
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
|
||||
for ind, fit in zip(invalid_ind, fitnesses):
|
||||
ind.fitness.values = fit
|
||||
|
||||
if halloffame is not None:
|
||||
halloffame.update(population)
|
||||
|
||||
record = stats.compile(population) if stats is not None else {}
|
||||
logbook.record(gen=0, nevals=len(invalid_ind), **record)
|
||||
if verbose:
|
||||
print logbook.stream
|
||||
|
||||
# Begin the generational process
|
||||
gen = 0
|
||||
maxtime = time.strptime(maxtime, '%Mm%Ss')
|
||||
maxtime = maxtime.tm_min*60 + maxtime.tm_sec
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < maxtime:
|
||||
# Vary the population
|
||||
offspring = algorithms.varOr(population, toolbox, lambda_, cxpb, mutpb)
|
||||
|
||||
# Evaluate the individuals with an invalid fitness
|
||||
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
|
||||
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
|
||||
for ind, fit in zip(invalid_ind, fitnesses):
|
||||
ind.fitness.values = fit
|
||||
|
||||
# Update the hall of fame with the generated individuals
|
||||
if halloffame is not None:
|
||||
halloffame.update(offspring)
|
||||
|
||||
# Select the next generation population
|
||||
population[:] = toolbox.select(population + offspring, mu)
|
||||
|
||||
# Update the statistics with the new population
|
||||
gen = gen + 1
|
||||
record = stats.compile(population) if stats is not None else {}
|
||||
logbook.record(gen=gen, nevals=len(invalid_ind), **record)
|
||||
if verbose:
|
||||
print logbook.stream
|
||||
|
||||
return population, logbook
|
||||
|
||||
def genetic(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
|
||||
gen = GeneticOperators(context.devices[0], statement, all_parameters, parameter_names, TemplateType, build_template)
|
||||
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
|
||||
creator.create("Individual", list, fitness=creator.FitnessMin)
|
||||
|
||||
toolbox = base.Toolbox()
|
||||
toolbox.register("individual", tools.initIterate, creator.Individual, gen.init)
|
||||
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
|
||||
toolbox.decorate("population", gen.repair)
|
||||
toolbox.register("evaluate", gen.evaluate)
|
||||
toolbox.register("mate", tools.cxUniform, indpb=0.3)
|
||||
toolbox.decorate("mate", gen.repair)
|
||||
toolbox.register("mutate", gen.mutate, indpb=0.2)
|
||||
toolbox.decorate("mutate", gen.repair)
|
||||
toolbox.register("select", tools.selNSGA2)
|
||||
|
||||
pop = toolbox.population(n=10)
|
||||
hof = tools.HallOfFame(1)
|
||||
|
||||
best_performer = lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x])
|
||||
best_profile = lambda x: '(%s)'%','.join(map(str,hof[0]))
|
||||
|
||||
cxpb = 0.5
|
||||
mutpb = 0.2
|
||||
|
||||
stats = tools.Statistics(lambda ind: ind.fitness.values)
|
||||
stats.register("max (" + perf_metric + ")", lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x]))
|
||||
stats.register("profile ", lambda x: '(%s)'%','.join(map(str,hof[0])))
|
||||
|
||||
pop, log = eaMuPlusLambda(pop, toolbox, 10, 20, cxpb=0.2, mutpb=0.2, maxtime='5m0s', stats=stats, halloffame=hof, verbose=True)
|
33
autotune/utils.py
Normal file
33
autotune/utils.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pyopencl as cl
|
||||
import pyviennacl as vcl
|
||||
|
||||
all_devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
|
||||
|
||||
DEVICE_TYPE_PREFIX = { cl.device_type.GPU: 'gpu',
|
||||
cl.device_type.CPU: 'cpu',
|
||||
cl.device_type.ACCELERATOR: 'accelerator'
|
||||
}
|
||||
|
||||
DEVICE_TYPE_CL_NAME = { cl.device_type.GPU: 'CL_DEVICE_TYPE_GPU',
|
||||
cl.device_type.CPU: 'CL_DEVICE_TYPE_CPU',
|
||||
cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR'
|
||||
}
|
||||
|
||||
VENDOR_PREFIX = { vcl.opencl.VendorId.beignet_id: 'beignet',
|
||||
vcl.opencl.VendorId.nvidia_id: 'nvidia',
|
||||
vcl.opencl.VendorId.amd_id: 'amd',
|
||||
vcl.opencl.VendorId.intel_id: 'intel'
|
||||
}
|
||||
|
||||
DEVICES_PRESETS = {'all': all_devices,
|
||||
'gpus': [d for d in all_devices if d.type==cl.device_type.GPU],
|
||||
'cpus': [d for d in all_devices if d.type==cl.device_type.CPU],
|
||||
'accelerators': [d for d in all_devices if d.type==cl.device_type.ACCELERATOR]
|
||||
}
|
||||
|
||||
|
||||
|
||||
def sanitize_string(string, keep_chars = ['_']):
|
||||
string = string.replace(' ', '_').lower()
|
||||
string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
|
||||
return string
|
116
autotune/vclio.py
Normal file
116
autotune/vclio.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import sys
|
||||
import os
|
||||
import utils
|
||||
|
||||
def append_include(data, path):
|
||||
include_name = '#include "' + path +'"\n'
|
||||
already_included = data.find(include_name)
|
||||
if already_included == -1:
|
||||
insert_index = data.index('\n', data.index('#define')) + 1
|
||||
return data[:insert_index] + '\n' + include_name + data[insert_index:]
|
||||
return data
|
||||
|
||||
def generate_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
|
||||
builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database")
|
||||
if not os.path.isdir(builtin_database_dir):
|
||||
raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n'
|
||||
'Your version of ViennaCL may be too old and/or corrupted.')
|
||||
|
||||
function_name_dict = { vcl.float32: 'add_4B',
|
||||
vcl.float64: 'add_8B' }
|
||||
|
||||
additional_parameters_dict = {'N': "char_to_type<'N'>",
|
||||
'T': "char_to_type<'T'>"}
|
||||
|
||||
#Create the device-specific headers
|
||||
cpp_device_name = utils.sanitize_string(device.name)
|
||||
function_name = function_name_dict[datatype]
|
||||
operation = operation.replace('-','_')
|
||||
|
||||
cpp_class_name = operation + '_template'
|
||||
header_name = cpp_device_name + ".hpp"
|
||||
function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \
|
||||
[additional_parameters_dict[x] for x in additional_parameters]) + ')'
|
||||
|
||||
device_type_prefix = utils.DEVICE_TYPE_PREFIX[device.type]
|
||||
vendor_prefix = utils.VENDOR_PREFIX[device.vendor_id]
|
||||
architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name)
|
||||
|
||||
header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family]
|
||||
header_directory = os.path.join(builtin_database_dir, *header_hierarchy)
|
||||
header_path = os.path.join(header_directory, header_name)
|
||||
|
||||
if not os.path.exists(header_directory):
|
||||
os.makedirs(header_directory)
|
||||
|
||||
if os.path.exists(header_path):
|
||||
with open (header_path, "r") as myfile:
|
||||
data=myfile.read()
|
||||
else:
|
||||
data = ''
|
||||
|
||||
if not data:
|
||||
ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper()
|
||||
data = ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
|
||||
'#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
|
||||
'\n'
|
||||
'#include "viennacl/device_specific/forwards.h"\n'
|
||||
'#include "viennacl/device_specific/builtin_database/common.hpp"\n'
|
||||
'\n'
|
||||
'namespace viennacl{\n'
|
||||
'namespace device_specific{\n'
|
||||
'namespace builtin_database{\n'
|
||||
'namespace devices{\n'
|
||||
'namespace ' + device_type_prefix + '{\n'
|
||||
'namespace ' + vendor_prefix + '{\n'
|
||||
'namespace ' + architecture_family + '{\n'
|
||||
'namespace ' + cpp_device_name + '{\n'
|
||||
'\n'
|
||||
'}\n'
|
||||
'}\n'
|
||||
'}\n'
|
||||
'}\n'
|
||||
'}\n'
|
||||
'}\n'
|
||||
'}\n'
|
||||
'}\n'
|
||||
'#endif\n'
|
||||
'')
|
||||
|
||||
data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp')
|
||||
|
||||
add_to_database_arguments = [vendor_prefix + '_id', utils.DEVICE_TYPE_CL_NAME[device.type], 'ocl::'+architecture_family,
|
||||
'"' + device.name + '"', cpp_class_name + '::parameters' + str(parameters)]
|
||||
core = ' db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');'
|
||||
|
||||
already_declared = data.find(function_declaration)
|
||||
if already_declared==-1:
|
||||
substr = 'namespace ' + cpp_device_name + '{\n'
|
||||
insert_index = data.index(substr) + len(substr)
|
||||
data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:]
|
||||
else:
|
||||
i1 = data.find('{', already_declared)
|
||||
if data[i1-1]=='\n':
|
||||
i1 = i1 - 1
|
||||
i2 = data.find('}', already_declared) + 1
|
||||
data = data[:i1] + '\n{\n' + core + '\n}' + data[i2:]
|
||||
|
||||
#Write the header file
|
||||
with open(header_path, "w+") as myfile:
|
||||
myfile.write(data)
|
||||
|
||||
#Updates the global ViennaCL headers
|
||||
with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header:
|
||||
data = operation_header.read()
|
||||
data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir)))
|
||||
|
||||
scope_name = '_'.join(('init', operation) + additional_parameters)
|
||||
scope = data.index(scope_name)
|
||||
function_call = ' ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')'
|
||||
if function_call not in data:
|
||||
insert_index = data.rindex('\n', 0, data.index('return result', scope))
|
||||
data = data[:insert_index] + function_call + ';\n' + data[insert_index:]
|
||||
|
||||
operation_header.seek(0)
|
||||
operation_header.truncate()
|
||||
operation_header.write(data)
|
Reference in New Issue
Block a user