diff --git a/python/autotune/external/config.ini b/python/autotune/external/config.ini index 65053f508..faffd5eb7 100644 --- a/python/autotune/external/config.ini +++ b/python/autotune/external/config.ini @@ -1,28 +1,23 @@ -viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/ +#~ viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/ [vector-axpy] -devices = 0 -precision = single, double -size = 5000000 +precision = single +#~ size = 5000000 -[reduction] -devices = 0 -precision = single, double -size = 5000000 - -[matrix-axpy] -devices = 0 -precision = single, double -size = 2560, 2560 - -[row-wise-reduction] -devices = 0 -precision = single, double -layout = N,T -size = 2560, 2560 - -[matrix-product] -devices = 0 -precision = single, double -layout = NN,NT,TN,TT -size = 1536, 1536, 1536 +#~ [reduction] +#~ precision = single, double +#~ size = 5000000 +#~ +#~ [matrix-axpy] +#~ precision = single, double +#~ size = 2560, 2560 +#~ +#~ [row-wise-reduction] +#~ precision = single, double +#~ layout = N,T +#~ size = 2560, 2560 +#~ +#~ [matrix-product] +#~ precision = single, double +#~ layout = NN,NT,TN,TT +#~ size = 1536, 1536, 1536 diff --git a/python/autotune/pysrc/autotune.py b/python/autotune/pysrc/autotune.py index 31455f912..5009c3c59 100644 --- a/python/autotune/pysrc/autotune.py +++ b/python/autotune/pysrc/autotune.py @@ -1,18 +1,19 @@ from __future__ import division -import argparse, itertools, os, sys +import argparse, itertools, os, sys, json import misc_tools, optimize import pyopencl as cl import pyviennacl as vcl import pyatidlas as atd - +import numpy as np from configobj import ConfigObj from numpy import random from dataset import generate_dataset from model import train_model + DATATYPES = { 'single' : vcl.float32, 'double' : vcl.float64 } @@ -36,34 +37,34 @@ TYPES = { 'vector-axpy': {'template':atd.VectorAxpyTemplate, 'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9, 'perf-measure': 'GFLOP/s'} } -def do_tuning(config_fname, viennacl_root): + +def do_tuning(config_fname, viennacl_root, device): + json_out = {} config = ConfigObj(config_fname) + def map_to_list(T, x): return list(map(T, x if isinstance(x, list) else [x])) + for operation in ['vector-axpy', 'matrix-axpy', 'reduction', 'row-wise-reduction', 'matrix-product']: + if operation in config: p = config[operation] - confdevices = p['devices'] - all_devices = [d for platform in cl.get_platforms() for d in platform.get_devices()] - DEVICES_PRESETS = {'all': all_devices, - 'gpus': [d for d in all_devices if d.type==cl.device_type.GPU], - 'cpus': [d for d in all_devices if d.type==cl.device_type.CPU], - 'accelerators': [d for d in all_devices if d.type==cl.device_type.ACCELERATOR] - } - devices = DEVICES_PRESETS[confdevices] if confdevices in DEVICES_PRESETS else [all_devices[int(i)] for i in confdevices] precisions = map_to_list(str, p['precision']) if 'all' in precisions: precisions = ['single','double'] datatypes = [DATATYPES[k] for k in precisions] - #Iterate through the datatypes and the devices - for datatype, device in itertools.product(datatypes, devices): + + #Iterate through the datatypes + for datatype in datatypes: + ctx = cl.Context([device]) ctx = vcl.backend.Context(ctx) - device = ctx.current_device + #Check data-type if datatype is vcl.float64 and not device.double_fp_config: sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...') continue + #Helper for execution def execute(device, node, other_params, sizes, fname = os.devnull, parameters = None): with vcl.Statement(node) as statement: @@ -75,6 +76,7 @@ def do_tuning(config_fname, viennacl_root): with open(fname, "w+") as archive: return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive) + #Helper for tuning def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters): if 'size' in p: @@ -85,7 +87,20 @@ def do_tuning(config_fname, viennacl_root): def compute_perf(x, t): return TYPES[operation]['perf-index']([datatype().itemsize, x, t]) X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler, nTuning, nDataPoints, draw) - train_model(X, Y, profiles, TYPES[operation]['perf-measure']) + clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure']) + + #Update JSON + full_operation = operation + ''.join(additional_parameters) + if full_operation not in json_out: + json_out[full_operation] = {} + json_out[full_operation][datatype.__name__] = {} + D = json_out[full_operation][datatype.__name__] + D['profiles'] = [ prof.astype('int').tolist() for prof in profiles] + D['predictor'] = [{'children_left': e.tree_.children_left.tolist(), + 'children_right': e.tree_.children_right.tolist(), + 'threshold': e.tree_.threshold.astype('float32').tolist(), + 'feature': e.tree_.feature.astype('float32').tolist(), + 'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_] #Vector AXPY if operation=='vector-axpy': @@ -143,6 +158,10 @@ def do_tuning(config_fname, viennacl_root): return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters) tune(execution_handler, 50, 2000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1])) + dname = misc_tools.sanitize_string(device.name) + json_out["version"] = "1.0" + json.dump(json_out, open(dname + '.json','w')) + if __name__ == "__main__": @@ -151,14 +170,15 @@ if __name__ == "__main__": print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available') tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file') tune_parser.add_argument("--config", default="config.ini", required=False, type=str) + tune_parser.add_argument("--device", default=0, required=False, type=str) tune_parser.add_argument("--viennacl-root", default='', required=False, type=str) args = parser.parse_args() + devices = [d for platform in cl.get_platforms() for d in platform.get_devices()] if(args.action=='list-devices'): print("----------------") print("Devices available:") print("----------------") - devices = [d for platform in cl.get_platforms() for d in platform.get_devices()] for (i, d) in enumerate(devices): print 'Device', i, '|', cl.device_type.to_string(d.type), '|', d.name, 'on', d.platform.name print("----------------") @@ -166,4 +186,4 @@ if __name__ == "__main__": print("------") print("Auto-tuning") print("------") - do_tuning(args.config, args.viennacl_root) + do_tuning(args.config, args.viennacl_root, devices[args.device]) diff --git a/python/autotune/pysrc/dataset.py b/python/autotune/pysrc/dataset.py index 376a97af6..c06476217 100644 --- a/python/autotune/pysrc/dataset.py +++ b/python/autotune/pysrc/dataset.py @@ -15,48 +15,53 @@ def resample(X, draw): def generate_dataset(TemplateType, execution_handler, nTuning, nDataPoints, draw): - print "Getting some good profiles..." - nDim = draw().size - X = np.empty((nTuning, nDim)) - t = np.empty(nTuning) - profiles = [] - for i in range(nTuning): - x = resample(X, draw) - y = execution_handler(x) - if y not in profiles: - profiles.append(y) - idx = profiles.index(y) - X[i,:] = x - t[i] = idx - - print "Generating the dataset..." - Y = np.empty((nDataPoints, len(profiles))) - X = np.empty((nDataPoints, nDim)) - t = [] - - for i in range(nDataPoints): - x = resample(X, draw) - for j,y in enumerate(profiles): - T = execution_handler(x, os.devnull, y) - Y[i,j] = T - idx = np.argmax(Y[i,:]) - X[i,:] = x - t = np.argmax(Y[:i+1,], axis=1) - if i%10==0: - sys.stdout.write('%d data points generated\r'%i) - sys.stdout.flush() + # print "Getting some good profiles..." + # nDim = draw().size + # X = np.empty((nTuning, nDim)) + # t = np.empty(nTuning) + # profiles = [] + # for i in range(nTuning): + # x = resample(X, draw) + # y = execution_handler(x) + # if y not in profiles: + # profiles.append(y) + # idx = profiles.index(y) + # X[i,:] = x + # t[i] = idx + # + # print "Generating the dataset..." + # Y = np.empty((nDataPoints, len(profiles))) + # X = np.empty((nDataPoints, nDim)) + # t = [] + # + # for i in range(nDataPoints): + # x = resample(X, draw) + # for j,y in enumerate(profiles): + # T = execution_handler(x, os.devnull, y) + # Y[i,j] = T + # idx = np.argmax(Y[i,:]) + # X[i,:] = x + # t = np.argmax(Y[:i+1,], axis=1) + # if i%10==0: + # sys.stdout.write('%d data points generated\r'%i) + # sys.stdout.flush() template_name = TemplateType.__name__ dir = os.path.join("data", template_name) if not os.path.exists(dir): os.makedirs(dir) - np.savetxt(os.path.join(dir,"profiles.csv"), profiles) - np.savetxt(os.path.join(dir,"X.csv"), X) - np.savetxt(os.path.join(dir,"Y.csv"), Y) + # np.savetxt(os.path.join(dir,"profiles.csv"), profiles) + # np.savetxt(os.path.join(dir,"X.csv"), X) + # np.savetxt(os.path.join(dir,"Y.csv"), Y) profiles = np.loadtxt(os.path.join(dir, "profiles.csv")) X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2) Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2) + #idx = np.argsort(np.bincount(np.argmin(Y, axis=1))) + idx = np.argsort(Y[np.argmax(X),:]) + Y = Y[:, idx] + profiles = profiles[idx] + return X, Y, profiles diff --git a/python/autotune/pysrc/misc_tools.py b/python/autotune/pysrc/misc_tools.py index bed1fd7d9..001b88ba7 100644 --- a/python/autotune/pysrc/misc_tools.py +++ b/python/autotune/pysrc/misc_tools.py @@ -207,13 +207,13 @@ def benchmark(template, statement, device): return current_time/N +def sanitize_string(string, keep_chars = ['_']): + string = string.replace(' ', '_').replace('-', '_').lower() + string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip() + return string + def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters): - def sanitize_string(string, keep_chars = ['_']): - string = string.replace(' ', '_').replace('-', '_').lower() - string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip() - return string - def append_include(data, path): include_name = '#include "' + path +'"\n' already_included = data.find(include_name) diff --git a/python/autotune/pysrc/model.py b/python/autotune/pysrc/model.py index f43d3a9f4..eaad42605 100644 --- a/python/autotune/pysrc/model.py +++ b/python/autotune/pysrc/model.py @@ -1,7 +1,6 @@ from sklearn import tree from sklearn import ensemble -from numpy import array, bincount, mean, std, max, argmax, min, argmin, median - +import numpy as np def gmean(a, axis=0, dtype=None): if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it @@ -16,25 +15,30 @@ def gmean(a, axis=0, dtype=None): return np.exp(log_a.mean(axis=axis)) def train_model(X, Y, profiles, metric): - print("Building the model...") - - Xmean = mean(X) - Xstd = std(X) - X = (X - Xmean)/Xstd - - Y = Y[:, :] - Ymax = max(Y) + Y=Y[:,:] + profiles=profiles[:] + Ymax = np.max(Y) Y = Y/Ymax - ref = argmax(bincount(argmin(Y, axis=1))) #most common profile - cut = int(0.800*X.shape[0]+1) - #Train the model - clf = ensemble.RandomForestRegressor(10, max_depth=10).fit(X[:cut,:], Y[:cut,:]) + cut = int(0.75*X.shape[0]) + clf = ensemble.RandomForestRegressor(10, max_depth=4).fit(X[:cut,:], Y[:cut,:]) - t = argmin(clf.predict(X[cut:,:]), axis = 1) - s = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)]) - tt = argmin(Y[cut:,:], axis = 1) - ss = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)]) - print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(s), median(s), min(s), max(s))) - print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(ss), median(ss), min(ss), max(ss))) + print clf.predict([10000]) + + t = np.argmin(clf.predict(X[cut:,:]), axis = 1) + s = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], t)]) + tt = np.argmin(Y[cut:,:], axis = 1) + ss = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], tt)]) + + p5 = lambda a: np.percentile(a, 5) + p25 = lambda a: np.percentile(a, 25) + p50 = lambda a: np.percentile(a, 50) + p75 = lambda a: np.percentile(a, 75) + p95 = lambda a: np.percentile(a, 95) + + print("Percentile :\t 5 \t 25 \t 50 \t 75 \t 95") + print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s))) + print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss))) + + return clf