Added viennacl-src-path in the UI

This commit is contained in:
Philippe Tillet
2014-11-02 10:05:14 -05:00
parent 45d602bfc8
commit 0ae87eeff4
3 changed files with 81 additions and 76 deletions

View File

@@ -12,11 +12,11 @@ from model import train_model
TYPES = { 'vector-axpy': {'template':atd.VectorAxpyTemplate,
'perf-index':lambda x: 2*x[0]*x[1][0]/x[2]*1e-9,
'perf-index':lambda x: 3*x[0]*x[1][0]/x[2]*1e-9,
'perf-measure':'GB/s'},
'matrix-axpy': {'template':atd.MatrixAxpyTemplate,
'perf-index':lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
'perf-index':lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
'perf-measure':'GB/s'},
'reduction': {'template':atd.ReductionTemplate,
@@ -53,7 +53,7 @@ def do_tuning(args):
for datatype in [vcl.float32, vcl.float64]:
if any(x in args.exclude_operations for x in [operation, operation + '-' + datatype.__name__]):
if not any(x in args.operations for x in [operation + '-' + datatype.__name__]):
continue
ctx = cl.Context([device])
@@ -106,10 +106,13 @@ def do_tuning(args):
def compute_perf(x, t):
return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
profiles_generator = log_space_gen_product(a, b, args.sample_size, dimsample)
profiles = dataset.sample_profiles(execution_handler, profiles_generator)
# profiles = dataset.sample_profiles(execution_handler, profiles_generator)
if args.build_model:
dataset_generator = log_space_gen_product(a, b, 1000, dimsample)
X, Y, profiles = dataset.sample_dataset(os.path.join(full_operation,datatype.__name__), profiles, execution_handler, dataset_generator)
# X, Y, profiles = dataset.sample_dataset(os.path.join(full_operation,datatype.__name__), profiles, execution_handler, dataset_generator)
profiles = np.loadtxt('data/vector-axpy/float32/profiles.csv')
X = np.loadtxt('data/vector-axpy/float32/X.csv',ndmin=2)
Y = np.loadtxt('data/vector-axpy/float32/Y.csv',ndmin=2)
clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
'children_right': e.tree_.children_right.tolist(),
@@ -125,9 +128,9 @@ def do_tuning(args):
if operation=='vector-axpy':
def execution_handler(sizes, fname=os.devnull, parameters=None):
x = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
z = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
return execute(device, vcl.Assign(z, x), (), sizes, fname, parameters)
tune(execution_handler, 1e4, 1e7, 1, ())
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
return execute(device, vcl.Assign(y, x + y), (), sizes, fname, parameters)
tune(execution_handler, 1e4, 2e7, 1, ())
#Reduction
if operation=='reduction':
def execution_handler(sizes, fname=os.devnull, parameters=None):
@@ -135,13 +138,13 @@ def do_tuning(args):
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
s = vcl.Scalar(0, context=ctx, dtype=datatype)
return execute(device, vcl.Assign(s, vcl.Dot(x,y)), (), sizes, fname, parameters)
tune(execution_handler, 1e4, 1e7, 1, ())
tune(execution_handler, 1e4, 2e7, 1, ())
#Matrix AXPY
if operation=='matrix-axpy':
def execution_handler(sizes, fname=os.devnull, parameters=None):
A = vcl.Matrix(sizes, context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
C = vcl.Matrix(sizes, context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
return execute(device, vcl.Assign(C,A), (), sizes, fname, parameters)
return execute(device, vcl.Assign(C,A + C), (), sizes, fname, parameters)
tune(execution_handler, 100, 4000, 2, ())
#Row-wise reduction
if operation=='row-wise-reduction':
@@ -178,52 +181,53 @@ class ArgumentsHandler:
def __init__(self):
#Command line arguments
parent_parser = argparse.ArgumentParser('parent', add_help=False)
parent_parser.add_argument('--version', action='version', version='%(prog)s 2.0')
parser = argparse.ArgumentParser(parents=[parent_parser])
subparsers = parser.add_subparsers(dest='action')
print_devices_parser = subparsers.add_parser('list-devices', help='List the devices available', parents=[parent_parser])
tune_parser = subparsers.add_parser('tune', help='Auto-tuning', parents=[parent_parser])
tune_parser.add_argument("--device", default=0, type=int)
tune_parser.add_argument("--exclude-operations", default = '', type=str)
tune_parser.add_argument("--gemm-layouts", default='NN,NT,TN,TT', type=str)
tune_parser.add_argument("--gemv-layouts", default='N,T', type=str)
tune_parser.add_argument("--json-file", default='', type=str)
tune_parser.add_argument("--viennacl-src-path", default='', type=str)
tune_subparsers = tune_parser.add_subparsers(dest='method')
simple_parser = tune_subparsers.add_parser('simple', help = 'Tune each operation for unique sizes')
simple_parser.add_argument("--blas1-size", default = 10e6, type=int)
simple_parser.add_argument("--blas2-size", nargs=2, default=[2560,2560], type=int)
simple_parser.add_argument("--blas3-size", nargs=3, default=[1536,1536,1536],type=int)
full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes')
full_parser.add_argument("--build-model", default=False, type=bool)
full_parser.add_argument("--sample-size", default=30, type=int)
args = parent_parser.parse_args()
self.__dict__ = args.__dict__.copy()
#No action argument -> interactive tuning
if 'action' not in vars(args):
def add_input(help, default):
return raw_input(help + "[" + default + "] : ") or default
if len(sys.argv)==1:
def add_input(help, default):
return raw_input(help + "[" + default + "] : ") or default
self.device = add_input('Device to tune for','0')
self.operations = add_input('Operations to tune for','vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float32').split(',')
self.gemm_layouts = add_input('GEMV Layouts', 'NN,NT,TN,TT') if 'matrix-product' in self.operations else ''
self.gemv_layouts = add_input('GEMV Layouts', 'N,T') if 'row-wise-reduction' in self.operations else ''
self.json_file = add_input('JSON File', misc_tools.sanitize_string(devices[int(self.device)].name) + '.json')
self.method = add_input('Tuning type', 'simple')
if self.method == 'simple':
self.blas1_size = add_input('BLAS1 size', '10e6')
self.blas2_size = add_input('BLAS2 sizes (M,N)', '2560,2560').split(',')
self.blas3_size = add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',')
else:
self.build_model = True
self.sample_size = 30
self.viennacl_src_path= add_input('ViennaCL src path', '')
else:
#Command line arguments
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest='action')
print_devices_parser = subparsers.add_parser('list-devices', help='List the devices available')
tune_parser = subparsers.add_parser('tune', help='Auto-tuning')
tune_parser.add_argument("--device", default=0, type=int)
tune_parser.add_argument("--operations", default = 'vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float32', type=str)
tune_parser.add_argument("--gemm-layouts", default='NN,NT,TN,TT', type=str)
tune_parser.add_argument("--gemv-layouts", default='N,T', type=str)
tune_parser.add_argument("--json-file", default='', type=str)
tune_parser.add_argument("--viennacl-src-path", default='', type=str)
tune_subparsers = tune_parser.add_subparsers(dest='method')
simple_parser = tune_subparsers.add_parser('simple', help = 'Tune each operation for unique sizes')
simple_parser.add_argument("--blas1-size", default = 10e6, type=int)
simple_parser.add_argument("--blas2-size", nargs=2, default=[2560,2560], type=int)
simple_parser.add_argument("--blas3-size", nargs=3, default=[1536,1536,1536],type=int)
full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes')
full_parser.add_argument("--build-model", default=True, type=bool)
full_parser.add_argument("--sample-size", default=30, type=int)
args = parser.parse_args()
self.__dict__ = args.__dict__.copy()
self.device = add_input('Device to tune for','0')
self.exclude_operations = add_input('Operations to exclude','vector-axpy,matrix-axpy,reduction,row-wise-reduction,matrix-product-float64').split(',')
self.gemm_layouts = '' if 'matrix-product' in self.exclude_operations else add_input('GEMV Layouts', 'NN,NT,TN,TT')
self.gemv_layouts = '' if 'row-wise-reduction' in self.exclude_operations else add_input('GEMV Layouts', 'N,T')
self.json_file = add_input('JSON File', misc_tools.sanitize_string(devices[int(self.device)].name) + '.json')
self.method = add_input('Tuning type', 'simple')
if self.method == 'simple':
self.blas1_size = add_input('BLAS1 size', '10e6')
self.blas2_size = add_input('BLAS2 sizes (M,N)', '2560,2560').split(',')
self.blas3_size = add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',')
self.build_model = True
self.sample_size = 30
#Retypes
self.device = devices[int(self.device)]

View File

@@ -7,6 +7,7 @@ import sys
import pyopencl as cl
import pyviennacl as vcl
import numpy as np
class PhysicalLimitsNV:
def __init__(self, dev):
@@ -196,15 +197,15 @@ def benchmark(template, statement, device):
else:
template.execute(statement, True)
statement.result.context.finish_all_queues()
N = 0
current_time = 0
timings = []
while current_time < 1e-1:
time_before = time.time()
template.execute(statement,False)
statement.result.context.finish_all_queues()
current_time = current_time + time.time() - time_before
N+=1
return current_time/N
timings.append(time.time() - time_before)
current_time = current_time + timings[-1]
return np.median(timings)
def sanitize_string(string, keep_chars = ['_']):

View File

@@ -13,7 +13,12 @@ def gmean(a, axis=0, dtype=None):
else:
log_a = np.log(a)
return np.exp(log_a.mean(axis=axis))
def nrmse(y_ground, y):
N = y.size
rmsd = np.sqrt(np.sum((y_ground - y)**2)/N)
return rmsd/(np.max(y_ground) - np.min(y_ground))
def train_model(X, Y, profiles, metric):
#Shuffle
p = np.random.permutation(X.shape[0])
@@ -22,24 +27,19 @@ def train_model(X, Y, profiles, metric):
#Normalize
Ymax = np.max(Y)
Y = Y/Ymax
#Train the model
cut = int(0.75*X.shape[0])
clf = ensemble.RandomForestRegressor(10, max_depth=3).fit(X[:cut,:], Y[:cut,:])
cut = int(0.9*X.shape[0])
nrmses = {}
for depth in range(1,10):
clf = ensemble.RandomForestRegressor(5, max_depth=4).fit(X[:cut,:], Y[:cut,:])
t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
y = np.array([Y[cut+i,t[i]] for i in range(t.size)])
y_ground = np.min(Y[cut:,:], axis=1)
# for i in range(t.size):
# print X[cut+i,:], y[i], y_ground[i]
nrmses[clf] = nrmse(y_ground, y)
print depth, nrmses[clf]
t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
s = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], t)])
tt = np.argmin(Y[cut:,:], axis = 1)
ss = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], tt)])
p5 = lambda a: np.percentile(a, 5)
p25 = lambda a: np.percentile(a, 25)
p50 = lambda a: np.percentile(a, 50)
p75 = lambda a: np.percentile(a, 75)
p95 = lambda a: np.percentile(a, 95)
print("Percentile :\t 5 \t 25 \t 50 \t 75 \t 95")
print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s)))
print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss)))
clf = min(nrmses, key=nrmses.get)
return clf