From 9a5627e20fe8adfbb39ae98c7be3a56c84a858c5 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Tue, 28 Oct 2014 01:10:14 -0400 Subject: [PATCH] Prettier command line stuff --- python/autotune/pysrc/autotune.py | 58 +++++++++++--------- python/autotune/pysrc/dataset.py | 91 ++++++++++++++++--------------- python/autotune/pysrc/optimize.py | 39 ------------- 3 files changed, 77 insertions(+), 111 deletions(-) diff --git a/python/autotune/pysrc/autotune.py b/python/autotune/pysrc/autotune.py index 8fbf63a36..b8cd97715 100644 --- a/python/autotune/pysrc/autotune.py +++ b/python/autotune/pysrc/autotune.py @@ -1,8 +1,7 @@ from __future__ import division import argparse, itertools, os, sys, json -import misc_tools, optimize - +import misc_tools, optimize, dataset import pyopencl as cl import pyviennacl as vcl import pyatidlas as atd @@ -10,7 +9,6 @@ import numpy as np from configobj import ConfigObj from numpy import random -from dataset import generate_dataset from model import train_model @@ -42,10 +40,10 @@ def do_tuning(args, devices): def map_to_list(T, x): return list(map(T, x if isinstance(x, list) else [x])) - if(args.method=='unique'): - default_tuning_sizes = {'vector-axpy': tuple(args.sizes[:1]), 'reduction': tuple(args.sizes[:1]), - 'matrix-axpy' : tuple(args.sizes[1:3]), 'row-wise-reduction' : tuple(args.sizes[1:3]), - 'matrix-product': tuple(args.sizes[3:])} + if(args.method=='simple'): + default_tuning_sizes = {'vector-axpy': [args.blas1_size], 'reduction': [args.blas1_size], + 'matrix-axpy' : args.blas2_size, 'row-wise-reduction' : args.blas2_size, + 'matrix-product': args.blas3_size} for operation in ['vector-axpy', 'matrix-axpy', 'reduction', 'row-wise-reduction', 'matrix-product']: #Iterate through the datatypes @@ -72,7 +70,7 @@ def do_tuning(args, devices): lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive) #Helper for tuning - def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters): + def tune(execution_handler, n_datapoints, sampler, additional_parameters): #Update JSON full_operation = operation + ''.join(additional_parameters) if full_operation not in json_out: @@ -80,20 +78,22 @@ def do_tuning(args, devices): json_out[full_operation][datatype.__name__] = {} D = json_out[full_operation][datatype.__name__] - if args.method == 'unique': + if args.method == 'simple': profiles = [execution_handler(map(int,default_tuning_sizes[operation]))] - if args.viennacl_src_path: - misc_tools.update_viennacl_headers(args.viennacl_src_path,device,datatype,operation,additional_parameters,profiles[0]) else: def compute_perf(x, t): return TYPES[operation]['perf-index']([datatype().itemsize, x, t]) - X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler, nTuning, nDataPoints, draw) - clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure']) - D['predictor'] = [{'children_left': e.tree_.children_left.tolist(), - 'children_right': e.tree_.children_right.tolist(), - 'threshold': e.tree_.threshold.astype('float32').tolist(), - 'feature': e.tree_.feature.astype('float32').tolist(), - 'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_] + profiles = dataset.sample_profiles(execution_handler, args.sample_size, sampler) + if args.build_model: + X, Y = dataset.sample_dataset(os.path.join(full_operation,datatype.__name__), profiles, execution_handler, n_datapoints, sampler) + clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure']) + D['predictor'] = [{'children_left': e.tree_.children_left.tolist(), + 'children_right': e.tree_.children_right.tolist(), + 'threshold': e.tree_.threshold.astype('float32').tolist(), + 'feature': e.tree_.feature.astype('float32').tolist(), + 'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_] + if args.viennacl_src_path: + misc_tools.update_viennacl_headers(args.viennacl_src_path,device,datatype,operation,additional_parameters,profiles[0]) D['profiles'] = [ prof.astype('int').tolist() for prof in profiles] @@ -104,7 +104,7 @@ def do_tuning(args, devices): y = vcl.Vector(sizes[0], context=ctx, dtype=datatype) z = vcl.Vector(sizes[0], context=ctx, dtype=datatype) return execute(device, vcl.Assign(z, vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y))), (), sizes, fname, parameters) - tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ()) + tune(execution_handler, 1000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ()) #Reduction if operation=='reduction': def execution_handler(sizes, fname=os.devnull, parameters=None): @@ -112,7 +112,7 @@ def do_tuning(args, devices): y = vcl.Vector(sizes[0], context=ctx, dtype=datatype) s = vcl.Scalar(0, context=ctx, dtype=datatype) return execute(device, vcl.Assign(s, vcl.Dot(x,y)), (), sizes, fname, parameters) - tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ()) + tune(execution_handler, 1000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ()) #Matrix AXPY if operation=='matrix-axpy': def execution_handler(sizes, fname=os.devnull, parameters=None): @@ -120,7 +120,7 @@ def do_tuning(args, devices): B = vcl.Matrix(sizes, context=ctx, dtype=datatype) C = vcl.Matrix(sizes, context=ctx, dtype=datatype) return execute(device, vcl.Assign(C,A+B), (), sizes, fname, parameters) - tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=5, high=100, size=2), ()) + tune(execution_handler, 1000, lambda : 64*np.random.randint(low=5, high=100, size=2), ()) #Row-wise reduction if operation=='row-wise-reduction': layouts = ['N', 'T'] @@ -131,7 +131,7 @@ def do_tuning(args, devices): y = vcl.Vector(sizes[0] if A_trans=='N' else sizes[1], context=ctx, dtype=datatype) LHS = A if A_trans=='N' else A.T return execute(device, vcl.Assign(y, LHS*x), (), sizes, fname, parameters) - tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=5, high=100, size=2), (A_trans,)) + tune(execution_handler, 1000, lambda : 64*np.random.randint(low=5, high=100, size=2), (A_trans,)) #Matrix Product if operation=='matrix-product': layouts = ['NN', 'NT', 'TN', 'TT'] @@ -147,7 +147,7 @@ def do_tuning(args, devices): beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype) C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR) return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters) - tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1])) + tune(execution_handler, 1000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1])) dname = misc_tools.sanitize_string(device.name) json_out["version"] = "1.0" @@ -161,14 +161,18 @@ if __name__ == "__main__": print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available') tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file') tune_parser.add_argument("--device", default=0, required=False, type=str) + tune_parser.add_argument("--viennacl-src-path", default='', type=str) tune_subparsers = tune_parser.add_subparsers(dest='method') - big_sizes_parser = tune_subparsers.add_parser('unique', help = 'Tune each operation for unique sizes') - big_sizes_parser.add_argument("--sizes", nargs='+', default=[10e6,2560,2560,1536,1536,1536], required=False, type=int, help = '6 = 1 + 2 + 3 sizes for respectively BLAS1, BLAS2, BLAS3') - big_sizes_parser.add_argument("--viennacl-src-path", default='', required=False, type=str) + simple_parser = tune_subparsers.add_parser('simple', help = 'Tune each operation for unique sizes') - model_parser = tune_subparsers.add_parser('build-model', help = 'Build an input-dependent model') + simple_parser.add_argument("--blas1-size", default = 10e6, type=int) + simple_parser.add_argument("--blas2-size", nargs=2, default=[2560,2560], type=int) + simple_parser.add_argument("--blas3-size", nargs=3, default=[1536,1536,1536],type=int) + full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes') + full_parser.add_argument("--build-model", default=False, type=bool) + full_parser.add_argument("--sample-size", default=30, type=int) args = parser.parse_args() diff --git a/python/autotune/pysrc/dataset.py b/python/autotune/pysrc/dataset.py index c06476217..f9c736f07 100644 --- a/python/autotune/pysrc/dataset.py +++ b/python/autotune/pysrc/dataset.py @@ -4,64 +4,65 @@ import re import random import numpy as np -def resample(X, draw): +def resample(X, sampler): Xtuples = [tuple(x) for x in X] r = random.random() while(True): - x = draw() + x = sampler() if tuple(x) not in Xtuples: break return x.astype(int) -def generate_dataset(TemplateType, execution_handler, nTuning, nDataPoints, draw): +def sample_profiles(execution_handler, nTuning, sampler): + print "Sampling profiles..." + nDim = sampler().size + X = np.empty((nTuning, nDim)) + t = np.empty(nTuning) + profiles = [] + for i in range(nTuning): + x = resample(X, sampler) + y = execution_handler(x) + if y not in profiles: + profiles.append(y) + idx = profiles.index(y) + X[i,:] = x + t[i] = idx - # print "Getting some good profiles..." - # nDim = draw().size - # X = np.empty((nTuning, nDim)) - # t = np.empty(nTuning) - # profiles = [] - # for i in range(nTuning): - # x = resample(X, draw) - # y = execution_handler(x) - # if y not in profiles: - # profiles.append(y) - # idx = profiles.index(y) - # X[i,:] = x - # t[i] = idx - # - # print "Generating the dataset..." - # Y = np.empty((nDataPoints, len(profiles))) - # X = np.empty((nDataPoints, nDim)) - # t = [] - # - # for i in range(nDataPoints): - # x = resample(X, draw) - # for j,y in enumerate(profiles): - # T = execution_handler(x, os.devnull, y) - # Y[i,j] = T - # idx = np.argmax(Y[i,:]) - # X[i,:] = x - # t = np.argmax(Y[:i+1,], axis=1) - # if i%10==0: - # sys.stdout.write('%d data points generated\r'%i) - # sys.stdout.flush() + idx = int(t[np.argmax(np.linalg.norm(X, axis=1))]) + profiles = np.array([profiles[idx]] + [x for i,x in enumerate(profiles) if i!=idx]) + return profiles - template_name = TemplateType.__name__ - dir = os.path.join("data", template_name) - if not os.path.exists(dir): - os.makedirs(dir) +def sample_dataset(prefix_name, profiles, execution_handler, nDataPoints, sampler): - # np.savetxt(os.path.join(dir,"profiles.csv"), profiles) - # np.savetxt(os.path.join(dir,"X.csv"), X) - # np.savetxt(os.path.join(dir,"Y.csv"), Y) + print "Generating the dataset..." + Y = np.empty((nDataPoints, len(profiles))) + X = np.empty((nDataPoints, len(profiles[0]))) + t = [] - profiles = np.loadtxt(os.path.join(dir, "profiles.csv")) - X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2) - Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2) + for i in range(nDataPoints): + x = resample(X, sampler) + for j,y in enumerate(profiles): + T = execution_handler(x, os.devnull, y) + Y[i,j] = T + idx = np.argmax(Y[i,:]) + X[i,:] = x + t = np.argmax(Y[:i+1,], axis=1) + if i%10==0: + sys.stdout.write('%d data points generated\r'%i) + sys.stdout.flush() - #idx = np.argsort(np.bincount(np.argmin(Y, axis=1))) idx = np.argsort(Y[np.argmax(X),:]) Y = Y[:, idx] profiles = profiles[idx] + + dir = os.path.join("data", prefix_name) + if not os.path.exists(dir): + os.makedirs(dir) + np.savetxt(os.path.join(dir,"X.csv"), X) + np.savetxt(os.path.join(dir,"Y.csv"), Y) + np.savetxt(os.path.join(dir,"profiles.csv"), profiles) + X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2) + Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2) + profiles = np.loadtxt(os.path.join(dir, "profiles.csv")) - return X, Y, profiles + return X, Y diff --git a/python/autotune/pysrc/optimize.py b/python/autotune/pysrc/optimize.py index 8df8aa438..134fe0d8c 100644 --- a/python/autotune/pysrc/optimize.py +++ b/python/autotune/pysrc/optimize.py @@ -4,45 +4,6 @@ import numpy as np from genetic import GeneticOperators -#~ def parameter_space(operation): - #~ simd = [1, 2, 4, 8] - #~ pow2_1D = [2**k for k in range(12)] - #~ pow2_2D = [2**i for i in range(8)] - #~ pow2_2D_unrolled = [2**i for i in range(8)] - #~ FetchingPolicy = vcl.atidlas.FetchingPolicy - #~ fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED] - #~ if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch] - #~ if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch] - #~ if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch] - #~ if operation == 'row-wise-reduction': return [simd, pow2_2D, pow2_2D, pow2_1D, fetch] - #~ if operation == 'matrix-product': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D_unrolled, pow2_2D_unrolled, pow2_2D_unrolled, fetch, fetch, [0] + pow2_2D, [0] + pow2_2D] - #~ - -#~ def exhaustive(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out): - #~ device = context.devices[0] - #~ nvalid = 0 - #~ current = 0 - #~ minT = float('inf') - #~ for individual in itertools.product(*all_parameters): - #~ template = build_template(TemplateType.Parameters(*individual)) - #~ if not tools.skip(template, statement, device): - #~ nvalid = nvalid + 1 - #~ for individual in itertools.product(*all_parameters): - #~ template = build_template(TemplateType.Parameters(*individual)) - #~ try: - #~ T = tools.benchmark(template,statement,device) - #~ current = current + 1 - #~ if T < minT: - #~ minT = T - #~ best = individual - #~ sys.stdout.write('%d / %d , Best is %d %s for %s\r'%(current, nvalid, compute_perf(minT), perf_metric, best)) - #~ sys.stdout.flush() - #~ except: - #~ pass - #~ sys.stdout.write('\n') - #~ sys.stdout.flush() - #~ - def genetic(statement, device, TemplateType, build_template, compute_perf, perf_metric, out): GA = GeneticOperators(device, statement, TemplateType, build_template, out) return GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric)