Prettier command line stuff
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
from __future__ import division
|
||||
|
||||
import argparse, itertools, os, sys, json
|
||||
import misc_tools, optimize
|
||||
|
||||
import misc_tools, optimize, dataset
|
||||
import pyopencl as cl
|
||||
import pyviennacl as vcl
|
||||
import pyatidlas as atd
|
||||
@@ -10,7 +9,6 @@ import numpy as np
|
||||
|
||||
from configobj import ConfigObj
|
||||
from numpy import random
|
||||
from dataset import generate_dataset
|
||||
from model import train_model
|
||||
|
||||
|
||||
@@ -42,10 +40,10 @@ def do_tuning(args, devices):
|
||||
def map_to_list(T, x):
|
||||
return list(map(T, x if isinstance(x, list) else [x]))
|
||||
|
||||
if(args.method=='unique'):
|
||||
default_tuning_sizes = {'vector-axpy': tuple(args.sizes[:1]), 'reduction': tuple(args.sizes[:1]),
|
||||
'matrix-axpy' : tuple(args.sizes[1:3]), 'row-wise-reduction' : tuple(args.sizes[1:3]),
|
||||
'matrix-product': tuple(args.sizes[3:])}
|
||||
if(args.method=='simple'):
|
||||
default_tuning_sizes = {'vector-axpy': [args.blas1_size], 'reduction': [args.blas1_size],
|
||||
'matrix-axpy' : args.blas2_size, 'row-wise-reduction' : args.blas2_size,
|
||||
'matrix-product': args.blas3_size}
|
||||
for operation in ['vector-axpy', 'matrix-axpy', 'reduction', 'row-wise-reduction', 'matrix-product']:
|
||||
|
||||
#Iterate through the datatypes
|
||||
@@ -72,7 +70,7 @@ def do_tuning(args, devices):
|
||||
lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
|
||||
|
||||
#Helper for tuning
|
||||
def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters):
|
||||
def tune(execution_handler, n_datapoints, sampler, additional_parameters):
|
||||
#Update JSON
|
||||
full_operation = operation + ''.join(additional_parameters)
|
||||
if full_operation not in json_out:
|
||||
@@ -80,20 +78,22 @@ def do_tuning(args, devices):
|
||||
json_out[full_operation][datatype.__name__] = {}
|
||||
D = json_out[full_operation][datatype.__name__]
|
||||
|
||||
if args.method == 'unique':
|
||||
if args.method == 'simple':
|
||||
profiles = [execution_handler(map(int,default_tuning_sizes[operation]))]
|
||||
if args.viennacl_src_path:
|
||||
misc_tools.update_viennacl_headers(args.viennacl_src_path,device,datatype,operation,additional_parameters,profiles[0])
|
||||
else:
|
||||
def compute_perf(x, t):
|
||||
return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
|
||||
X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler, nTuning, nDataPoints, draw)
|
||||
clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
|
||||
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
|
||||
'children_right': e.tree_.children_right.tolist(),
|
||||
'threshold': e.tree_.threshold.astype('float32').tolist(),
|
||||
'feature': e.tree_.feature.astype('float32').tolist(),
|
||||
'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_]
|
||||
profiles = dataset.sample_profiles(execution_handler, args.sample_size, sampler)
|
||||
if args.build_model:
|
||||
X, Y = dataset.sample_dataset(os.path.join(full_operation,datatype.__name__), profiles, execution_handler, n_datapoints, sampler)
|
||||
clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
|
||||
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
|
||||
'children_right': e.tree_.children_right.tolist(),
|
||||
'threshold': e.tree_.threshold.astype('float32').tolist(),
|
||||
'feature': e.tree_.feature.astype('float32').tolist(),
|
||||
'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_]
|
||||
if args.viennacl_src_path:
|
||||
misc_tools.update_viennacl_headers(args.viennacl_src_path,device,datatype,operation,additional_parameters,profiles[0])
|
||||
D['profiles'] = [ prof.astype('int').tolist() for prof in profiles]
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ def do_tuning(args, devices):
|
||||
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
z = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
return execute(device, vcl.Assign(z, vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y))), (), sizes, fname, parameters)
|
||||
tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ())
|
||||
tune(execution_handler, 1000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ())
|
||||
#Reduction
|
||||
if operation=='reduction':
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
@@ -112,7 +112,7 @@ def do_tuning(args, devices):
|
||||
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
s = vcl.Scalar(0, context=ctx, dtype=datatype)
|
||||
return execute(device, vcl.Assign(s, vcl.Dot(x,y)), (), sizes, fname, parameters)
|
||||
tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ())
|
||||
tune(execution_handler, 1000, lambda : 64*np.random.randint(low=10, high=100000, size=1), ())
|
||||
#Matrix AXPY
|
||||
if operation=='matrix-axpy':
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
@@ -120,7 +120,7 @@ def do_tuning(args, devices):
|
||||
B = vcl.Matrix(sizes, context=ctx, dtype=datatype)
|
||||
C = vcl.Matrix(sizes, context=ctx, dtype=datatype)
|
||||
return execute(device, vcl.Assign(C,A+B), (), sizes, fname, parameters)
|
||||
tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=5, high=100, size=2), ())
|
||||
tune(execution_handler, 1000, lambda : 64*np.random.randint(low=5, high=100, size=2), ())
|
||||
#Row-wise reduction
|
||||
if operation=='row-wise-reduction':
|
||||
layouts = ['N', 'T']
|
||||
@@ -131,7 +131,7 @@ def do_tuning(args, devices):
|
||||
y = vcl.Vector(sizes[0] if A_trans=='N' else sizes[1], context=ctx, dtype=datatype)
|
||||
LHS = A if A_trans=='N' else A.T
|
||||
return execute(device, vcl.Assign(y, LHS*x), (), sizes, fname, parameters)
|
||||
tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=5, high=100, size=2), (A_trans,))
|
||||
tune(execution_handler, 1000, lambda : 64*np.random.randint(low=5, high=100, size=2), (A_trans,))
|
||||
#Matrix Product
|
||||
if operation=='matrix-product':
|
||||
layouts = ['NN', 'NT', 'TN', 'TT']
|
||||
@@ -147,7 +147,7 @@ def do_tuning(args, devices):
|
||||
beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
||||
C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
|
||||
return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters)
|
||||
tune(execution_handler, 30, 1000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1]))
|
||||
tune(execution_handler, 1000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1]))
|
||||
|
||||
dname = misc_tools.sanitize_string(device.name)
|
||||
json_out["version"] = "1.0"
|
||||
@@ -161,14 +161,18 @@ if __name__ == "__main__":
|
||||
print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available')
|
||||
tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file')
|
||||
tune_parser.add_argument("--device", default=0, required=False, type=str)
|
||||
tune_parser.add_argument("--viennacl-src-path", default='', type=str)
|
||||
|
||||
tune_subparsers = tune_parser.add_subparsers(dest='method')
|
||||
big_sizes_parser = tune_subparsers.add_parser('unique', help = 'Tune each operation for unique sizes')
|
||||
big_sizes_parser.add_argument("--sizes", nargs='+', default=[10e6,2560,2560,1536,1536,1536], required=False, type=int, help = '6 = 1 + 2 + 3 sizes for respectively BLAS1, BLAS2, BLAS3')
|
||||
big_sizes_parser.add_argument("--viennacl-src-path", default='', required=False, type=str)
|
||||
simple_parser = tune_subparsers.add_parser('simple', help = 'Tune each operation for unique sizes')
|
||||
|
||||
model_parser = tune_subparsers.add_parser('build-model', help = 'Build an input-dependent model')
|
||||
simple_parser.add_argument("--blas1-size", default = 10e6, type=int)
|
||||
simple_parser.add_argument("--blas2-size", nargs=2, default=[2560,2560], type=int)
|
||||
simple_parser.add_argument("--blas3-size", nargs=3, default=[1536,1536,1536],type=int)
|
||||
|
||||
full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes')
|
||||
full_parser.add_argument("--build-model", default=False, type=bool)
|
||||
full_parser.add_argument("--sample-size", default=30, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@@ -4,64 +4,65 @@ import re
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
def resample(X, draw):
|
||||
def resample(X, sampler):
|
||||
Xtuples = [tuple(x) for x in X]
|
||||
r = random.random()
|
||||
while(True):
|
||||
x = draw()
|
||||
x = sampler()
|
||||
if tuple(x) not in Xtuples:
|
||||
break
|
||||
return x.astype(int)
|
||||
|
||||
def generate_dataset(TemplateType, execution_handler, nTuning, nDataPoints, draw):
|
||||
def sample_profiles(execution_handler, nTuning, sampler):
|
||||
print "Sampling profiles..."
|
||||
nDim = sampler().size
|
||||
X = np.empty((nTuning, nDim))
|
||||
t = np.empty(nTuning)
|
||||
profiles = []
|
||||
for i in range(nTuning):
|
||||
x = resample(X, sampler)
|
||||
y = execution_handler(x)
|
||||
if y not in profiles:
|
||||
profiles.append(y)
|
||||
idx = profiles.index(y)
|
||||
X[i,:] = x
|
||||
t[i] = idx
|
||||
|
||||
# print "Getting some good profiles..."
|
||||
# nDim = draw().size
|
||||
# X = np.empty((nTuning, nDim))
|
||||
# t = np.empty(nTuning)
|
||||
# profiles = []
|
||||
# for i in range(nTuning):
|
||||
# x = resample(X, draw)
|
||||
# y = execution_handler(x)
|
||||
# if y not in profiles:
|
||||
# profiles.append(y)
|
||||
# idx = profiles.index(y)
|
||||
# X[i,:] = x
|
||||
# t[i] = idx
|
||||
#
|
||||
# print "Generating the dataset..."
|
||||
# Y = np.empty((nDataPoints, len(profiles)))
|
||||
# X = np.empty((nDataPoints, nDim))
|
||||
# t = []
|
||||
#
|
||||
# for i in range(nDataPoints):
|
||||
# x = resample(X, draw)
|
||||
# for j,y in enumerate(profiles):
|
||||
# T = execution_handler(x, os.devnull, y)
|
||||
# Y[i,j] = T
|
||||
# idx = np.argmax(Y[i,:])
|
||||
# X[i,:] = x
|
||||
# t = np.argmax(Y[:i+1,], axis=1)
|
||||
# if i%10==0:
|
||||
# sys.stdout.write('%d data points generated\r'%i)
|
||||
# sys.stdout.flush()
|
||||
idx = int(t[np.argmax(np.linalg.norm(X, axis=1))])
|
||||
profiles = np.array([profiles[idx]] + [x for i,x in enumerate(profiles) if i!=idx])
|
||||
return profiles
|
||||
|
||||
template_name = TemplateType.__name__
|
||||
dir = os.path.join("data", template_name)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
def sample_dataset(prefix_name, profiles, execution_handler, nDataPoints, sampler):
|
||||
|
||||
# np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
|
||||
# np.savetxt(os.path.join(dir,"X.csv"), X)
|
||||
# np.savetxt(os.path.join(dir,"Y.csv"), Y)
|
||||
print "Generating the dataset..."
|
||||
Y = np.empty((nDataPoints, len(profiles)))
|
||||
X = np.empty((nDataPoints, len(profiles[0])))
|
||||
t = []
|
||||
|
||||
profiles = np.loadtxt(os.path.join(dir, "profiles.csv"))
|
||||
X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2)
|
||||
Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2)
|
||||
for i in range(nDataPoints):
|
||||
x = resample(X, sampler)
|
||||
for j,y in enumerate(profiles):
|
||||
T = execution_handler(x, os.devnull, y)
|
||||
Y[i,j] = T
|
||||
idx = np.argmax(Y[i,:])
|
||||
X[i,:] = x
|
||||
t = np.argmax(Y[:i+1,], axis=1)
|
||||
if i%10==0:
|
||||
sys.stdout.write('%d data points generated\r'%i)
|
||||
sys.stdout.flush()
|
||||
|
||||
#idx = np.argsort(np.bincount(np.argmin(Y, axis=1)))
|
||||
idx = np.argsort(Y[np.argmax(X),:])
|
||||
Y = Y[:, idx]
|
||||
profiles = profiles[idx]
|
||||
|
||||
dir = os.path.join("data", prefix_name)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
np.savetxt(os.path.join(dir,"X.csv"), X)
|
||||
np.savetxt(os.path.join(dir,"Y.csv"), Y)
|
||||
np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
|
||||
X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2)
|
||||
Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2)
|
||||
profiles = np.loadtxt(os.path.join(dir, "profiles.csv"))
|
||||
|
||||
return X, Y, profiles
|
||||
return X, Y
|
||||
|
@@ -4,45 +4,6 @@ import numpy as np
|
||||
|
||||
from genetic import GeneticOperators
|
||||
|
||||
#~ def parameter_space(operation):
|
||||
#~ simd = [1, 2, 4, 8]
|
||||
#~ pow2_1D = [2**k for k in range(12)]
|
||||
#~ pow2_2D = [2**i for i in range(8)]
|
||||
#~ pow2_2D_unrolled = [2**i for i in range(8)]
|
||||
#~ FetchingPolicy = vcl.atidlas.FetchingPolicy
|
||||
#~ fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
|
||||
#~ if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch]
|
||||
#~ if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch]
|
||||
#~ if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
|
||||
#~ if operation == 'row-wise-reduction': return [simd, pow2_2D, pow2_2D, pow2_1D, fetch]
|
||||
#~ if operation == 'matrix-product': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D_unrolled, pow2_2D_unrolled, pow2_2D_unrolled, fetch, fetch, [0] + pow2_2D, [0] + pow2_2D]
|
||||
#~
|
||||
|
||||
#~ def exhaustive(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
|
||||
#~ device = context.devices[0]
|
||||
#~ nvalid = 0
|
||||
#~ current = 0
|
||||
#~ minT = float('inf')
|
||||
#~ for individual in itertools.product(*all_parameters):
|
||||
#~ template = build_template(TemplateType.Parameters(*individual))
|
||||
#~ if not tools.skip(template, statement, device):
|
||||
#~ nvalid = nvalid + 1
|
||||
#~ for individual in itertools.product(*all_parameters):
|
||||
#~ template = build_template(TemplateType.Parameters(*individual))
|
||||
#~ try:
|
||||
#~ T = tools.benchmark(template,statement,device)
|
||||
#~ current = current + 1
|
||||
#~ if T < minT:
|
||||
#~ minT = T
|
||||
#~ best = individual
|
||||
#~ sys.stdout.write('%d / %d , Best is %d %s for %s\r'%(current, nvalid, compute_perf(minT), perf_metric, best))
|
||||
#~ sys.stdout.flush()
|
||||
#~ except:
|
||||
#~ pass
|
||||
#~ sys.stdout.write('\n')
|
||||
#~ sys.stdout.flush()
|
||||
#~
|
||||
|
||||
def genetic(statement, device, TemplateType, build_template, compute_perf, perf_metric, out):
|
||||
GA = GeneticOperators(device, statement, TemplateType, build_template, out)
|
||||
return GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric)
|
||||
|
Reference in New Issue
Block a user