From 0eb56a10f0ebd3376c17f5c4a79c7aec8ab0d4df Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Sun, 28 Sep 2014 19:37:56 -0400 Subject: [PATCH] Simple linear model --- autotune/python/autotune.py | 20 ++++++++++++----- autotune/python/dataset.py | 30 +++++++++++++------------ autotune/python/model.py | 44 +++++++++++++++++++++++++++++++++++++ autotune/python/tools.py | 26 +++++++++++----------- 4 files changed, 87 insertions(+), 33 deletions(-) create mode 100644 autotune/python/model.py diff --git a/autotune/python/autotune.py b/autotune/python/autotune.py index 2292c34bf..31f270e29 100644 --- a/autotune/python/autotune.py +++ b/autotune/python/autotune.py @@ -12,6 +12,8 @@ from pyviennacl import backend from pyviennacl import opencl from pyviennacl import atidlas from dataset import generate_dataset +from model import train_model +import tools import utils import vclio @@ -66,12 +68,11 @@ def do_tuning(config_fname, spec_fname, viennacl_root): sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...') continue #Helper - def execute(node, other_params, sizes, fname = os.devnull): + def execute(statement, other_params, sizes, fname = os.devnull): print('-----') print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes)))) with open(fname, "w+") as archive: - with vcl.Statement(node) as statement: - return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), + return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), TYPES[operation]['parameter-names'], lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive) s = map_to_list((int, p['size'])) #Vector AXPY @@ -100,7 +101,7 @@ def do_tuning(config_fname, spec_fname, viennacl_root): if 'all' in layouts: layouts = ['NN', 'NT', 'TN', 'TT'] for layout in layouts: - def execution_handler(sizes, fname): + def execution_handler(sizes, fname, parameters=None): A_trans = layout[0] B_trans = layout[1] A = vcl.Matrix((sizes[0], sizes[1]) if A_trans=='N' else (sizes[1],sizes[0]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR); @@ -110,8 +111,15 @@ def do_tuning(config_fname, spec_fname, viennacl_root): alpha = vcl.HostScalar(1.0, context=ctx, dtype = datatype) beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype) C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR) - execute(vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname) - generate_dataset(operation, execution_handler) + statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta)) + if parameters: + TemplateType = TYPES[operation]['template'] + return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),A_trans,B_trans), statement, device) + else: + execute(statement,(A_trans, B_trans), sizes, fname) + X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler) + train_model(X, Y, profiles) + if __name__ == "__main__": diff --git a/autotune/python/dataset.py b/autotune/python/dataset.py index 9d1de872a..c4d6da4f5 100644 --- a/autotune/python/dataset.py +++ b/autotune/python/dataset.py @@ -1,14 +1,22 @@ import os +import sys import re import random import numpy as np from sklearn.neighbors.kde import KernelDensity; +from pyviennacl.atidlas import FetchingPolicy -def generate_dataset(operation, execution_handler): - I = 5 +def decode(y): + fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED] + y[7] = fetch[y[7]] + y[8] = fetch[y[8]] + return y + +def generate_dataset(TemplateType, execution_handler): + I = 0 step = 64; max_size = 4000; - + #Retrieves the existing data print "Retrieving data..." path = "./data" @@ -56,7 +64,6 @@ def generate_dataset(operation, execution_handler): if tuple(x) not in Xtuples: break; x = x.astype(int) - x = [2048, 2048, 512] fname = os.path.join(path, `x[0]` +"-"+ `x[1]` +"-"+ `x[2]` +".csv") #Execute auto-tuning procedure execution_handler(x, fname) @@ -75,11 +82,10 @@ def generate_dataset(operation, execution_handler): #Update density estimator p(M,N,K | t=idx) kdes[idx].fit(X[t[0:len(files)+i+1]==idx,:]); - + print "Exporting data..."; #Shuffle the list of file files = os.listdir(path) - random.shuffle(files) X = np.empty((len(files),3)) Y = np.zeros((len(files), len(profiles))) for i,fname in enumerate(files): @@ -89,11 +95,7 @@ def generate_dataset(operation, execution_handler): A = np.loadtxt(fl,delimiter=',') for j,y in enumerate(profiles): idx = np.where(np.all(A[:,1:]==y,axis=1))[0] - if idx.size: - Y[i,j] = 2*1e-9*X[i,0]*X[i,1]*X[i,2]/A[idx[0],0] - else: - sys.exit('Data invalid! Were all the data csv files generated using the same auto-tuner options?') - np.savetxt(export_path+'X.csv', X) - np.savetxt(export_path+'Y.csv', Y) - np.savetxt(export_path+'profiles.csv', profiles) - open(export_path+'pad.csv', 'w').write(str(pad)) + T = A[idx[0], 0] if idx.size else execution_handler(map(int,X[i,:]), '', decode(map(int, y))) + Y[i,j] = 2*1e-9*X[i,0]*X[i,1]*X[i,2]/T + + return X, Y, profiles diff --git a/autotune/python/model.py b/autotune/python/model.py new file mode 100644 index 000000000..c1eb3dbca --- /dev/null +++ b/autotune/python/model.py @@ -0,0 +1,44 @@ +from sklearn import *; +from sklearn import ensemble; +import numpy as np +import scipy as sp + +def train_model(X, Y, profiles): + #Preprocessing + scaler = preprocessing.StandardScaler().fit(X); + X = scaler.transform(X); + ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile + + print Y + print np.bincount(np.argmax(Y, axis=1)) + #Cross-validation data-sets + cut = int(0.5*X.shape[0]+1); + XTr = X[0:cut, :]; + YTr = Y[0:cut, :]; + XTe = X[cut:,:]; + YTe = Y[cut:,:]; + + #Train the model + print("Training the model..."); + clf = linear_model.LinearRegression().fit(XTr,YTr); + + #Evaluate the model + GFlops = np.empty(XTe.shape[0]); + speedups = np.empty(XTe.shape[0]); + optspeedups = np.empty(XTe.shape[0]); + for i,x in enumerate(XTe): + predictions = clf.predict(x); + label = np.argmax(predictions); + speedups[i] = YTe[i,label]/YTe[i,ref]; + optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref]; + GFlops[i] = YTe[i,ref]; + + np.set_printoptions(precision=2); + print("-----------------"); + print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups))); + print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups)))); + print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)])); + print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)])); + print("--------"); + + print clf diff --git a/autotune/python/tools.py b/autotune/python/tools.py index cd29c22b9..14a1d9b5b 100644 --- a/autotune/python/tools.py +++ b/autotune/python/tools.py @@ -122,17 +122,17 @@ def benchmark(template, statement, device): if occupancy_record.occupancy < 15 : raise ValueError("Template has too low occupancy") else: - try: - template.execute(statement, True) + #~ try: + template.execute(statement, True) + statement.result.context.finish_all_queues() + N = 0 + current_time = 0 + while current_time < 1e-2: + time_before = time.time() + template.execute(statement,False) statement.result.context.finish_all_queues() - N = 0 - current_time = 0 - while current_time < 1e-2: - time_before = time.time() - template.execute(statement,False) - statement.result.context.finish_all_queues() - current_time += time.time() - time_before - N+=1 - return current_time/N - except: - raise ValueError("Invalid template") + current_time += time.time() - time_before + N+=1 + return current_time/N + #~ except: + #~ raise ValueError("Invalid template")