From 0ae87eeff450bb7db5f89de08971ac66fef19c81 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Sun, 2 Nov 2014 10:05:14 -0500 Subject: [PATCH] Added viennacl-src-path in the UI --- python/autotune/pysrc/autotune.py | 112 ++++++++++++++-------------- python/autotune/pysrc/misc_tools.py | 9 ++- python/autotune/pysrc/model.py | 36 ++++----- 3 files changed, 81 insertions(+), 76 deletions(-) diff --git a/python/autotune/pysrc/autotune.py b/python/autotune/pysrc/autotune.py index 43934ec68..61543410e 100644 --- a/python/autotune/pysrc/autotune.py +++ b/python/autotune/pysrc/autotune.py @@ -12,11 +12,11 @@ from model import train_model TYPES = { 'vector-axpy': {'template':atd.VectorAxpyTemplate, - 'perf-index':lambda x: 2*x[0]*x[1][0]/x[2]*1e-9, + 'perf-index':lambda x: 3*x[0]*x[1][0]/x[2]*1e-9, 'perf-measure':'GB/s'}, 'matrix-axpy': {'template':atd.MatrixAxpyTemplate, - 'perf-index':lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9, + 'perf-index':lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9, 'perf-measure':'GB/s'}, 'reduction': {'template':atd.ReductionTemplate, @@ -53,7 +53,7 @@ def do_tuning(args): for datatype in [vcl.float32, vcl.float64]: - if any(x in args.exclude_operations for x in [operation, operation + '-' + datatype.__name__]): + if not any(x in args.operations for x in [operation + '-' + datatype.__name__]): continue ctx = cl.Context([device]) @@ -106,10 +106,13 @@ def do_tuning(args): def compute_perf(x, t): return TYPES[operation]['perf-index']([datatype().itemsize, x, t]) profiles_generator = log_space_gen_product(a, b, args.sample_size, dimsample) - profiles = dataset.sample_profiles(execution_handler, profiles_generator) + # profiles = dataset.sample_profiles(execution_handler, profiles_generator) if args.build_model: dataset_generator = log_space_gen_product(a, b, 1000, dimsample) - X, Y, profiles = dataset.sample_dataset(os.path.join(full_operation,datatype.__name__), profiles, execution_handler, dataset_generator) + # X, Y, profiles = dataset.sample_dataset(os.path.join(full_operation,datatype.__name__), profiles, execution_handler, dataset_generator) + profiles = np.loadtxt('data/vector-axpy/float32/profiles.csv') + X = np.loadtxt('data/vector-axpy/float32/X.csv',ndmin=2) + Y = np.loadtxt('data/vector-axpy/float32/Y.csv',ndmin=2) clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure']) D['predictor'] = [{'children_left': e.tree_.children_left.tolist(), 'children_right': e.tree_.children_right.tolist(), @@ -125,9 +128,9 @@ def do_tuning(args): if operation=='vector-axpy': def execution_handler(sizes, fname=os.devnull, parameters=None): x = vcl.Vector(sizes[0], context=ctx, dtype=datatype) - z = vcl.Vector(sizes[0], context=ctx, dtype=datatype) - return execute(device, vcl.Assign(z, x), (), sizes, fname, parameters) - tune(execution_handler, 1e4, 1e7, 1, ()) + y = vcl.Vector(sizes[0], context=ctx, dtype=datatype) + return execute(device, vcl.Assign(y, x + y), (), sizes, fname, parameters) + tune(execution_handler, 1e4, 2e7, 1, ()) #Reduction if operation=='reduction': def execution_handler(sizes, fname=os.devnull, parameters=None): @@ -135,13 +138,13 @@ def do_tuning(args): y = vcl.Vector(sizes[0], context=ctx, dtype=datatype) s = vcl.Scalar(0, context=ctx, dtype=datatype) return execute(device, vcl.Assign(s, vcl.Dot(x,y)), (), sizes, fname, parameters) - tune(execution_handler, 1e4, 1e7, 1, ()) + tune(execution_handler, 1e4, 2e7, 1, ()) #Matrix AXPY if operation=='matrix-axpy': def execution_handler(sizes, fname=os.devnull, parameters=None): A = vcl.Matrix(sizes, context=ctx, dtype=datatype, layout=vcl.COL_MAJOR) C = vcl.Matrix(sizes, context=ctx, dtype=datatype, layout=vcl.COL_MAJOR) - return execute(device, vcl.Assign(C,A), (), sizes, fname, parameters) + return execute(device, vcl.Assign(C,A + C), (), sizes, fname, parameters) tune(execution_handler, 100, 4000, 2, ()) #Row-wise reduction if operation=='row-wise-reduction': @@ -178,52 +181,53 @@ class ArgumentsHandler: def __init__(self): - #Command line arguments - parent_parser = argparse.ArgumentParser('parent', add_help=False) - parent_parser.add_argument('--version', action='version', version='%(prog)s 2.0') - - parser = argparse.ArgumentParser(parents=[parent_parser]) - subparsers = parser.add_subparsers(dest='action') - print_devices_parser = subparsers.add_parser('list-devices', help='List the devices available', parents=[parent_parser]) - tune_parser = subparsers.add_parser('tune', help='Auto-tuning', parents=[parent_parser]) - tune_parser.add_argument("--device", default=0, type=int) - tune_parser.add_argument("--exclude-operations", default = '', type=str) - tune_parser.add_argument("--gemm-layouts", default='NN,NT,TN,TT', type=str) - tune_parser.add_argument("--gemv-layouts", default='N,T', type=str) - tune_parser.add_argument("--json-file", default='', type=str) - tune_parser.add_argument("--viennacl-src-path", default='', type=str) - - tune_subparsers = tune_parser.add_subparsers(dest='method') - simple_parser = tune_subparsers.add_parser('simple', help = 'Tune each operation for unique sizes') - - simple_parser.add_argument("--blas1-size", default = 10e6, type=int) - simple_parser.add_argument("--blas2-size", nargs=2, default=[2560,2560], type=int) - simple_parser.add_argument("--blas3-size", nargs=3, default=[1536,1536,1536],type=int) - - full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes') - full_parser.add_argument("--build-model", default=False, type=bool) - full_parser.add_argument("--sample-size", default=30, type=int) - - args = parent_parser.parse_args() - self.__dict__ = args.__dict__.copy() - #No action argument -> interactive tuning - if 'action' not in vars(args): - def add_input(help, default): - return raw_input(help + "[" + default + "] : ") or default + if len(sys.argv)==1: + def add_input(help, default): + return raw_input(help + "[" + default + "] : ") or default + + self.device = add_input('Device to tune for','0') + self.operations = add_input('Operations to tune for','vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float32').split(',') + self.gemm_layouts = add_input('GEMV Layouts', 'NN,NT,TN,TT') if 'matrix-product' in self.operations else '' + self.gemv_layouts = add_input('GEMV Layouts', 'N,T') if 'row-wise-reduction' in self.operations else '' + self.json_file = add_input('JSON File', misc_tools.sanitize_string(devices[int(self.device)].name) + '.json') + self.method = add_input('Tuning type', 'simple') + if self.method == 'simple': + self.blas1_size = add_input('BLAS1 size', '10e6') + self.blas2_size = add_input('BLAS2 sizes (M,N)', '2560,2560').split(',') + self.blas3_size = add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',') + else: + self.build_model = True + self.sample_size = 30 + self.viennacl_src_path= add_input('ViennaCL src path', '') + else: + #Command line arguments + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest='action') + print_devices_parser = subparsers.add_parser('list-devices', help='List the devices available') + tune_parser = subparsers.add_parser('tune', help='Auto-tuning') + tune_parser.add_argument("--device", default=0, type=int) + tune_parser.add_argument("--operations", default = 'vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float32', type=str) + tune_parser.add_argument("--gemm-layouts", default='NN,NT,TN,TT', type=str) + tune_parser.add_argument("--gemv-layouts", default='N,T', type=str) + tune_parser.add_argument("--json-file", default='', type=str) + tune_parser.add_argument("--viennacl-src-path", default='', type=str) + + tune_subparsers = tune_parser.add_subparsers(dest='method') + simple_parser = tune_subparsers.add_parser('simple', help = 'Tune each operation for unique sizes') + + simple_parser.add_argument("--blas1-size", default = 10e6, type=int) + simple_parser.add_argument("--blas2-size", nargs=2, default=[2560,2560], type=int) + simple_parser.add_argument("--blas3-size", nargs=3, default=[1536,1536,1536],type=int) + + full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes') + full_parser.add_argument("--build-model", default=True, type=bool) + full_parser.add_argument("--sample-size", default=30, type=int) + + args = parser.parse_args() + self.__dict__ = args.__dict__.copy() + - self.device = add_input('Device to tune for','0') - self.exclude_operations = add_input('Operations to exclude','vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float64').split(',') - self.gemm_layouts = '' if 'matrix-product' in self.exclude_operations else add_input('GEMV Layouts', 'NN,NT,TN,TT') - self.gemv_layouts = '' if 'row-wise-reduction' in self.exclude_operations else add_input('GEMV Layouts', 'N,T') - self.json_file = add_input('JSON File', misc_tools.sanitize_string(devices[int(self.device)].name) + '.json') - self.method = add_input('Tuning type', 'simple') - if self.method == 'simple': - self.blas1_size = add_input('BLAS1 size', '10e6') - self.blas2_size = add_input('BLAS2 sizes (M,N)', '2560,2560').split(',') - self.blas3_size = add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',') - self.build_model = True - self.sample_size = 30 #Retypes self.device = devices[int(self.device)] diff --git a/python/autotune/pysrc/misc_tools.py b/python/autotune/pysrc/misc_tools.py index 001b88ba7..ebaee6b2f 100644 --- a/python/autotune/pysrc/misc_tools.py +++ b/python/autotune/pysrc/misc_tools.py @@ -7,6 +7,7 @@ import sys import pyopencl as cl import pyviennacl as vcl +import numpy as np class PhysicalLimitsNV: def __init__(self, dev): @@ -196,15 +197,15 @@ def benchmark(template, statement, device): else: template.execute(statement, True) statement.result.context.finish_all_queues() - N = 0 current_time = 0 + timings = [] while current_time < 1e-1: time_before = time.time() template.execute(statement,False) statement.result.context.finish_all_queues() - current_time = current_time + time.time() - time_before - N+=1 - return current_time/N + timings.append(time.time() - time_before) + current_time = current_time + timings[-1] + return np.median(timings) def sanitize_string(string, keep_chars = ['_']): diff --git a/python/autotune/pysrc/model.py b/python/autotune/pysrc/model.py index 11516ae9a..ce0e9665a 100644 --- a/python/autotune/pysrc/model.py +++ b/python/autotune/pysrc/model.py @@ -13,7 +13,12 @@ def gmean(a, axis=0, dtype=None): else: log_a = np.log(a) return np.exp(log_a.mean(axis=axis)) - + +def nrmse(y_ground, y): + N = y.size + rmsd = np.sqrt(np.sum((y_ground - y)**2)/N) + return rmsd/(np.max(y_ground) - np.min(y_ground)) + def train_model(X, Y, profiles, metric): #Shuffle p = np.random.permutation(X.shape[0]) @@ -22,24 +27,19 @@ def train_model(X, Y, profiles, metric): #Normalize Ymax = np.max(Y) Y = Y/Ymax - #Train the model - cut = int(0.75*X.shape[0]) - clf = ensemble.RandomForestRegressor(10, max_depth=3).fit(X[:cut,:], Y[:cut,:]) + cut = int(0.9*X.shape[0]) + nrmses = {} + for depth in range(1,10): + clf = ensemble.RandomForestRegressor(5, max_depth=4).fit(X[:cut,:], Y[:cut,:]) + t = np.argmin(clf.predict(X[cut:,:]), axis = 1) + y = np.array([Y[cut+i,t[i]] for i in range(t.size)]) + y_ground = np.min(Y[cut:,:], axis=1) + # for i in range(t.size): + # print X[cut+i,:], y[i], y_ground[i] + nrmses[clf] = nrmse(y_ground, y) + print depth, nrmses[clf] - t = np.argmin(clf.predict(X[cut:,:]), axis = 1) - s = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], t)]) - tt = np.argmin(Y[cut:,:], axis = 1) - ss = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], tt)]) - - p5 = lambda a: np.percentile(a, 5) - p25 = lambda a: np.percentile(a, 25) - p50 = lambda a: np.percentile(a, 50) - p75 = lambda a: np.percentile(a, 75) - p95 = lambda a: np.percentile(a, 95) - - print("Percentile :\t 5 \t 25 \t 50 \t 75 \t 95") - print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s))) - print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss))) + clf = min(nrmses, key=nrmses.get) return clf