Added viennacl-src-path in the UI
This commit is contained in:
@@ -12,11 +12,11 @@ from model import train_model
|
||||
|
||||
|
||||
TYPES = { 'vector-axpy': {'template':atd.VectorAxpyTemplate,
|
||||
'perf-index':lambda x: 2*x[0]*x[1][0]/x[2]*1e-9,
|
||||
'perf-index':lambda x: 3*x[0]*x[1][0]/x[2]*1e-9,
|
||||
'perf-measure':'GB/s'},
|
||||
|
||||
'matrix-axpy': {'template':atd.MatrixAxpyTemplate,
|
||||
'perf-index':lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
||||
'perf-index':lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
||||
'perf-measure':'GB/s'},
|
||||
|
||||
'reduction': {'template':atd.ReductionTemplate,
|
||||
@@ -53,7 +53,7 @@ def do_tuning(args):
|
||||
|
||||
for datatype in [vcl.float32, vcl.float64]:
|
||||
|
||||
if any(x in args.exclude_operations for x in [operation, operation + '-' + datatype.__name__]):
|
||||
if not any(x in args.operations for x in [operation + '-' + datatype.__name__]):
|
||||
continue
|
||||
|
||||
ctx = cl.Context([device])
|
||||
@@ -106,10 +106,13 @@ def do_tuning(args):
|
||||
def compute_perf(x, t):
|
||||
return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
|
||||
profiles_generator = log_space_gen_product(a, b, args.sample_size, dimsample)
|
||||
profiles = dataset.sample_profiles(execution_handler, profiles_generator)
|
||||
# profiles = dataset.sample_profiles(execution_handler, profiles_generator)
|
||||
if args.build_model:
|
||||
dataset_generator = log_space_gen_product(a, b, 1000, dimsample)
|
||||
X, Y, profiles = dataset.sample_dataset(os.path.join(full_operation,datatype.__name__), profiles, execution_handler, dataset_generator)
|
||||
# X, Y, profiles = dataset.sample_dataset(os.path.join(full_operation,datatype.__name__), profiles, execution_handler, dataset_generator)
|
||||
profiles = np.loadtxt('data/vector-axpy/float32/profiles.csv')
|
||||
X = np.loadtxt('data/vector-axpy/float32/X.csv',ndmin=2)
|
||||
Y = np.loadtxt('data/vector-axpy/float32/Y.csv',ndmin=2)
|
||||
clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
|
||||
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
|
||||
'children_right': e.tree_.children_right.tolist(),
|
||||
@@ -125,9 +128,9 @@ def do_tuning(args):
|
||||
if operation=='vector-axpy':
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
x = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
z = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
return execute(device, vcl.Assign(z, x), (), sizes, fname, parameters)
|
||||
tune(execution_handler, 1e4, 1e7, 1, ())
|
||||
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
return execute(device, vcl.Assign(y, x + y), (), sizes, fname, parameters)
|
||||
tune(execution_handler, 1e4, 2e7, 1, ())
|
||||
#Reduction
|
||||
if operation=='reduction':
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
@@ -135,13 +138,13 @@ def do_tuning(args):
|
||||
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
s = vcl.Scalar(0, context=ctx, dtype=datatype)
|
||||
return execute(device, vcl.Assign(s, vcl.Dot(x,y)), (), sizes, fname, parameters)
|
||||
tune(execution_handler, 1e4, 1e7, 1, ())
|
||||
tune(execution_handler, 1e4, 2e7, 1, ())
|
||||
#Matrix AXPY
|
||||
if operation=='matrix-axpy':
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
A = vcl.Matrix(sizes, context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
|
||||
C = vcl.Matrix(sizes, context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
|
||||
return execute(device, vcl.Assign(C,A), (), sizes, fname, parameters)
|
||||
return execute(device, vcl.Assign(C,A + C), (), sizes, fname, parameters)
|
||||
tune(execution_handler, 100, 4000, 2, ())
|
||||
#Row-wise reduction
|
||||
if operation=='row-wise-reduction':
|
||||
@@ -178,52 +181,53 @@ class ArgumentsHandler:
|
||||
|
||||
def __init__(self):
|
||||
|
||||
#Command line arguments
|
||||
parent_parser = argparse.ArgumentParser('parent', add_help=False)
|
||||
parent_parser.add_argument('--version', action='version', version='%(prog)s 2.0')
|
||||
|
||||
parser = argparse.ArgumentParser(parents=[parent_parser])
|
||||
subparsers = parser.add_subparsers(dest='action')
|
||||
print_devices_parser = subparsers.add_parser('list-devices', help='List the devices available', parents=[parent_parser])
|
||||
tune_parser = subparsers.add_parser('tune', help='Auto-tuning', parents=[parent_parser])
|
||||
tune_parser.add_argument("--device", default=0, type=int)
|
||||
tune_parser.add_argument("--exclude-operations", default = '', type=str)
|
||||
tune_parser.add_argument("--gemm-layouts", default='NN,NT,TN,TT', type=str)
|
||||
tune_parser.add_argument("--gemv-layouts", default='N,T', type=str)
|
||||
tune_parser.add_argument("--json-file", default='', type=str)
|
||||
tune_parser.add_argument("--viennacl-src-path", default='', type=str)
|
||||
|
||||
tune_subparsers = tune_parser.add_subparsers(dest='method')
|
||||
simple_parser = tune_subparsers.add_parser('simple', help = 'Tune each operation for unique sizes')
|
||||
|
||||
simple_parser.add_argument("--blas1-size", default = 10e6, type=int)
|
||||
simple_parser.add_argument("--blas2-size", nargs=2, default=[2560,2560], type=int)
|
||||
simple_parser.add_argument("--blas3-size", nargs=3, default=[1536,1536,1536],type=int)
|
||||
|
||||
full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes')
|
||||
full_parser.add_argument("--build-model", default=False, type=bool)
|
||||
full_parser.add_argument("--sample-size", default=30, type=int)
|
||||
|
||||
args = parent_parser.parse_args()
|
||||
self.__dict__ = args.__dict__.copy()
|
||||
|
||||
#No action argument -> interactive tuning
|
||||
if 'action' not in vars(args):
|
||||
def add_input(help, default):
|
||||
return raw_input(help + "[" + default + "] : ") or default
|
||||
if len(sys.argv)==1:
|
||||
def add_input(help, default):
|
||||
return raw_input(help + "[" + default + "] : ") or default
|
||||
|
||||
self.device = add_input('Device to tune for','0')
|
||||
self.operations = add_input('Operations to tune for','vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float32').split(',')
|
||||
self.gemm_layouts = add_input('GEMV Layouts', 'NN,NT,TN,TT') if 'matrix-product' in self.operations else ''
|
||||
self.gemv_layouts = add_input('GEMV Layouts', 'N,T') if 'row-wise-reduction' in self.operations else ''
|
||||
self.json_file = add_input('JSON File', misc_tools.sanitize_string(devices[int(self.device)].name) + '.json')
|
||||
self.method = add_input('Tuning type', 'simple')
|
||||
if self.method == 'simple':
|
||||
self.blas1_size = add_input('BLAS1 size', '10e6')
|
||||
self.blas2_size = add_input('BLAS2 sizes (M,N)', '2560,2560').split(',')
|
||||
self.blas3_size = add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',')
|
||||
else:
|
||||
self.build_model = True
|
||||
self.sample_size = 30
|
||||
self.viennacl_src_path= add_input('ViennaCL src path', '')
|
||||
else:
|
||||
#Command line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
subparsers = parser.add_subparsers(dest='action')
|
||||
print_devices_parser = subparsers.add_parser('list-devices', help='List the devices available')
|
||||
tune_parser = subparsers.add_parser('tune', help='Auto-tuning')
|
||||
tune_parser.add_argument("--device", default=0, type=int)
|
||||
tune_parser.add_argument("--operations", default = 'vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float32', type=str)
|
||||
tune_parser.add_argument("--gemm-layouts", default='NN,NT,TN,TT', type=str)
|
||||
tune_parser.add_argument("--gemv-layouts", default='N,T', type=str)
|
||||
tune_parser.add_argument("--json-file", default='', type=str)
|
||||
tune_parser.add_argument("--viennacl-src-path", default='', type=str)
|
||||
|
||||
tune_subparsers = tune_parser.add_subparsers(dest='method')
|
||||
simple_parser = tune_subparsers.add_parser('simple', help = 'Tune each operation for unique sizes')
|
||||
|
||||
simple_parser.add_argument("--blas1-size", default = 10e6, type=int)
|
||||
simple_parser.add_argument("--blas2-size", nargs=2, default=[2560,2560], type=int)
|
||||
simple_parser.add_argument("--blas3-size", nargs=3, default=[1536,1536,1536],type=int)
|
||||
|
||||
full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes')
|
||||
full_parser.add_argument("--build-model", default=True, type=bool)
|
||||
full_parser.add_argument("--sample-size", default=30, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
self.__dict__ = args.__dict__.copy()
|
||||
|
||||
|
||||
self.device = add_input('Device to tune for','0')
|
||||
self.exclude_operations = add_input('Operations to exclude','vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float64').split(',')
|
||||
self.gemm_layouts = '' if 'matrix-product' in self.exclude_operations else add_input('GEMV Layouts', 'NN,NT,TN,TT')
|
||||
self.gemv_layouts = '' if 'row-wise-reduction' in self.exclude_operations else add_input('GEMV Layouts', 'N,T')
|
||||
self.json_file = add_input('JSON File', misc_tools.sanitize_string(devices[int(self.device)].name) + '.json')
|
||||
self.method = add_input('Tuning type', 'simple')
|
||||
if self.method == 'simple':
|
||||
self.blas1_size = add_input('BLAS1 size', '10e6')
|
||||
self.blas2_size = add_input('BLAS2 sizes (M,N)', '2560,2560').split(',')
|
||||
self.blas3_size = add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',')
|
||||
self.build_model = True
|
||||
self.sample_size = 30
|
||||
|
||||
#Retypes
|
||||
self.device = devices[int(self.device)]
|
||||
|
@@ -7,6 +7,7 @@ import sys
|
||||
|
||||
import pyopencl as cl
|
||||
import pyviennacl as vcl
|
||||
import numpy as np
|
||||
|
||||
class PhysicalLimitsNV:
|
||||
def __init__(self, dev):
|
||||
@@ -196,15 +197,15 @@ def benchmark(template, statement, device):
|
||||
else:
|
||||
template.execute(statement, True)
|
||||
statement.result.context.finish_all_queues()
|
||||
N = 0
|
||||
current_time = 0
|
||||
timings = []
|
||||
while current_time < 1e-1:
|
||||
time_before = time.time()
|
||||
template.execute(statement,False)
|
||||
statement.result.context.finish_all_queues()
|
||||
current_time = current_time + time.time() - time_before
|
||||
N+=1
|
||||
return current_time/N
|
||||
timings.append(time.time() - time_before)
|
||||
current_time = current_time + timings[-1]
|
||||
return np.median(timings)
|
||||
|
||||
|
||||
def sanitize_string(string, keep_chars = ['_']):
|
||||
|
@@ -13,7 +13,12 @@ def gmean(a, axis=0, dtype=None):
|
||||
else:
|
||||
log_a = np.log(a)
|
||||
return np.exp(log_a.mean(axis=axis))
|
||||
|
||||
|
||||
def nrmse(y_ground, y):
|
||||
N = y.size
|
||||
rmsd = np.sqrt(np.sum((y_ground - y)**2)/N)
|
||||
return rmsd/(np.max(y_ground) - np.min(y_ground))
|
||||
|
||||
def train_model(X, Y, profiles, metric):
|
||||
#Shuffle
|
||||
p = np.random.permutation(X.shape[0])
|
||||
@@ -22,24 +27,19 @@ def train_model(X, Y, profiles, metric):
|
||||
#Normalize
|
||||
Ymax = np.max(Y)
|
||||
Y = Y/Ymax
|
||||
|
||||
#Train the model
|
||||
cut = int(0.75*X.shape[0])
|
||||
clf = ensemble.RandomForestRegressor(10, max_depth=3).fit(X[:cut,:], Y[:cut,:])
|
||||
cut = int(0.9*X.shape[0])
|
||||
nrmses = {}
|
||||
for depth in range(1,10):
|
||||
clf = ensemble.RandomForestRegressor(5, max_depth=4).fit(X[:cut,:], Y[:cut,:])
|
||||
t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
|
||||
y = np.array([Y[cut+i,t[i]] for i in range(t.size)])
|
||||
y_ground = np.min(Y[cut:,:], axis=1)
|
||||
# for i in range(t.size):
|
||||
# print X[cut+i,:], y[i], y_ground[i]
|
||||
nrmses[clf] = nrmse(y_ground, y)
|
||||
print depth, nrmses[clf]
|
||||
|
||||
t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
|
||||
s = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], t)])
|
||||
tt = np.argmin(Y[cut:,:], axis = 1)
|
||||
ss = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], tt)])
|
||||
|
||||
p5 = lambda a: np.percentile(a, 5)
|
||||
p25 = lambda a: np.percentile(a, 25)
|
||||
p50 = lambda a: np.percentile(a, 50)
|
||||
p75 = lambda a: np.percentile(a, 75)
|
||||
p95 = lambda a: np.percentile(a, 95)
|
||||
|
||||
print("Percentile :\t 5 \t 25 \t 50 \t 75 \t 95")
|
||||
print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s)))
|
||||
print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss)))
|
||||
clf = min(nrmses, key=nrmses.get)
|
||||
|
||||
return clf
|
||||
|
Reference in New Issue
Block a user