Python/Autotune: Moved devices from config.ini to command line argument

This commit is contained in:
Philippe Tillet
2014-10-27 03:28:46 -04:00
parent 7780423fa1
commit ba50960b0f
5 changed files with 124 additions and 100 deletions

View File

@@ -1,28 +1,23 @@
viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/ #~ viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/
[vector-axpy] [vector-axpy]
devices = 0 precision = single
precision = single, double #~ size = 5000000
size = 5000000
[reduction] #~ [reduction]
devices = 0 #~ precision = single, double
precision = single, double #~ size = 5000000
size = 5000000 #~
#~ [matrix-axpy]
[matrix-axpy] #~ precision = single, double
devices = 0 #~ size = 2560, 2560
precision = single, double #~
size = 2560, 2560 #~ [row-wise-reduction]
#~ precision = single, double
[row-wise-reduction] #~ layout = N,T
devices = 0 #~ size = 2560, 2560
precision = single, double #~
layout = N,T #~ [matrix-product]
size = 2560, 2560 #~ precision = single, double
#~ layout = NN,NT,TN,TT
[matrix-product] #~ size = 1536, 1536, 1536
devices = 0
precision = single, double
layout = NN,NT,TN,TT
size = 1536, 1536, 1536

View File

@@ -1,18 +1,19 @@
from __future__ import division from __future__ import division
import argparse, itertools, os, sys import argparse, itertools, os, sys, json
import misc_tools, optimize import misc_tools, optimize
import pyopencl as cl import pyopencl as cl
import pyviennacl as vcl import pyviennacl as vcl
import pyatidlas as atd import pyatidlas as atd
import numpy as np
from configobj import ConfigObj from configobj import ConfigObj
from numpy import random from numpy import random
from dataset import generate_dataset from dataset import generate_dataset
from model import train_model from model import train_model
DATATYPES = { 'single' : vcl.float32, DATATYPES = { 'single' : vcl.float32,
'double' : vcl.float64 } 'double' : vcl.float64 }
@@ -36,34 +37,34 @@ TYPES = { 'vector-axpy': {'template':atd.VectorAxpyTemplate,
'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9, 'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
'perf-measure': 'GFLOP/s'} } 'perf-measure': 'GFLOP/s'} }
def do_tuning(config_fname, viennacl_root):
def do_tuning(config_fname, viennacl_root, device):
json_out = {}
config = ConfigObj(config_fname) config = ConfigObj(config_fname)
def map_to_list(T, x): def map_to_list(T, x):
return list(map(T, x if isinstance(x, list) else [x])) return list(map(T, x if isinstance(x, list) else [x]))
for operation in ['vector-axpy', 'matrix-axpy', 'reduction', 'row-wise-reduction', 'matrix-product']: for operation in ['vector-axpy', 'matrix-axpy', 'reduction', 'row-wise-reduction', 'matrix-product']:
if operation in config: if operation in config:
p = config[operation] p = config[operation]
confdevices = p['devices']
all_devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
DEVICES_PRESETS = {'all': all_devices,
'gpus': [d for d in all_devices if d.type==cl.device_type.GPU],
'cpus': [d for d in all_devices if d.type==cl.device_type.CPU],
'accelerators': [d for d in all_devices if d.type==cl.device_type.ACCELERATOR]
}
devices = DEVICES_PRESETS[confdevices] if confdevices in DEVICES_PRESETS else [all_devices[int(i)] for i in confdevices]
precisions = map_to_list(str, p['precision']) precisions = map_to_list(str, p['precision'])
if 'all' in precisions: if 'all' in precisions:
precisions = ['single','double'] precisions = ['single','double']
datatypes = [DATATYPES[k] for k in precisions] datatypes = [DATATYPES[k] for k in precisions]
#Iterate through the datatypes and the devices
for datatype, device in itertools.product(datatypes, devices): #Iterate through the datatypes
for datatype in datatypes:
ctx = cl.Context([device]) ctx = cl.Context([device])
ctx = vcl.backend.Context(ctx) ctx = vcl.backend.Context(ctx)
device = ctx.current_device
#Check data-type #Check data-type
if datatype is vcl.float64 and not device.double_fp_config: if datatype is vcl.float64 and not device.double_fp_config:
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...') sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
continue continue
#Helper for execution #Helper for execution
def execute(device, node, other_params, sizes, fname = os.devnull, parameters = None): def execute(device, node, other_params, sizes, fname = os.devnull, parameters = None):
with vcl.Statement(node) as statement: with vcl.Statement(node) as statement:
@@ -75,6 +76,7 @@ def do_tuning(config_fname, viennacl_root):
with open(fname, "w+") as archive: with open(fname, "w+") as archive:
return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive) lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
#Helper for tuning #Helper for tuning
def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters): def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters):
if 'size' in p: if 'size' in p:
@@ -85,7 +87,20 @@ def do_tuning(config_fname, viennacl_root):
def compute_perf(x, t): def compute_perf(x, t):
return TYPES[operation]['perf-index']([datatype().itemsize, x, t]) return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler, nTuning, nDataPoints, draw) X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler, nTuning, nDataPoints, draw)
train_model(X, Y, profiles, TYPES[operation]['perf-measure']) clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
#Update JSON
full_operation = operation + ''.join(additional_parameters)
if full_operation not in json_out:
json_out[full_operation] = {}
json_out[full_operation][datatype.__name__] = {}
D = json_out[full_operation][datatype.__name__]
D['profiles'] = [ prof.astype('int').tolist() for prof in profiles]
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
'children_right': e.tree_.children_right.tolist(),
'threshold': e.tree_.threshold.astype('float32').tolist(),
'feature': e.tree_.feature.astype('float32').tolist(),
'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_]
#Vector AXPY #Vector AXPY
if operation=='vector-axpy': if operation=='vector-axpy':
@@ -143,6 +158,10 @@ def do_tuning(config_fname, viennacl_root):
return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters) return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters)
tune(execution_handler, 50, 2000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1])) tune(execution_handler, 50, 2000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1]))
dname = misc_tools.sanitize_string(device.name)
json_out["version"] = "1.0"
json.dump(json_out, open(dname + '.json','w'))
if __name__ == "__main__": if __name__ == "__main__":
@@ -151,14 +170,15 @@ if __name__ == "__main__":
print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available') print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available')
tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file') tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file')
tune_parser.add_argument("--config", default="config.ini", required=False, type=str) tune_parser.add_argument("--config", default="config.ini", required=False, type=str)
tune_parser.add_argument("--device", default=0, required=False, type=str)
tune_parser.add_argument("--viennacl-root", default='', required=False, type=str) tune_parser.add_argument("--viennacl-root", default='', required=False, type=str)
args = parser.parse_args() args = parser.parse_args()
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
if(args.action=='list-devices'): if(args.action=='list-devices'):
print("----------------") print("----------------")
print("Devices available:") print("Devices available:")
print("----------------") print("----------------")
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
for (i, d) in enumerate(devices): for (i, d) in enumerate(devices):
print 'Device', i, '|', cl.device_type.to_string(d.type), '|', d.name, 'on', d.platform.name print 'Device', i, '|', cl.device_type.to_string(d.type), '|', d.name, 'on', d.platform.name
print("----------------") print("----------------")
@@ -166,4 +186,4 @@ if __name__ == "__main__":
print("------") print("------")
print("Auto-tuning") print("Auto-tuning")
print("------") print("------")
do_tuning(args.config, args.viennacl_root) do_tuning(args.config, args.viennacl_root, devices[args.device])

View File

@@ -15,48 +15,53 @@ def resample(X, draw):
def generate_dataset(TemplateType, execution_handler, nTuning, nDataPoints, draw): def generate_dataset(TemplateType, execution_handler, nTuning, nDataPoints, draw):
print "Getting some good profiles..." # print "Getting some good profiles..."
nDim = draw().size # nDim = draw().size
X = np.empty((nTuning, nDim)) # X = np.empty((nTuning, nDim))
t = np.empty(nTuning) # t = np.empty(nTuning)
profiles = [] # profiles = []
for i in range(nTuning): # for i in range(nTuning):
x = resample(X, draw) # x = resample(X, draw)
y = execution_handler(x) # y = execution_handler(x)
if y not in profiles: # if y not in profiles:
profiles.append(y) # profiles.append(y)
idx = profiles.index(y) # idx = profiles.index(y)
X[i,:] = x # X[i,:] = x
t[i] = idx # t[i] = idx
#
print "Generating the dataset..." # print "Generating the dataset..."
Y = np.empty((nDataPoints, len(profiles))) # Y = np.empty((nDataPoints, len(profiles)))
X = np.empty((nDataPoints, nDim)) # X = np.empty((nDataPoints, nDim))
t = [] # t = []
#
for i in range(nDataPoints): # for i in range(nDataPoints):
x = resample(X, draw) # x = resample(X, draw)
for j,y in enumerate(profiles): # for j,y in enumerate(profiles):
T = execution_handler(x, os.devnull, y) # T = execution_handler(x, os.devnull, y)
Y[i,j] = T # Y[i,j] = T
idx = np.argmax(Y[i,:]) # idx = np.argmax(Y[i,:])
X[i,:] = x # X[i,:] = x
t = np.argmax(Y[:i+1,], axis=1) # t = np.argmax(Y[:i+1,], axis=1)
if i%10==0: # if i%10==0:
sys.stdout.write('%d data points generated\r'%i) # sys.stdout.write('%d data points generated\r'%i)
sys.stdout.flush() # sys.stdout.flush()
template_name = TemplateType.__name__ template_name = TemplateType.__name__
dir = os.path.join("data", template_name) dir = os.path.join("data", template_name)
if not os.path.exists(dir): if not os.path.exists(dir):
os.makedirs(dir) os.makedirs(dir)
np.savetxt(os.path.join(dir,"profiles.csv"), profiles) # np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
np.savetxt(os.path.join(dir,"X.csv"), X) # np.savetxt(os.path.join(dir,"X.csv"), X)
np.savetxt(os.path.join(dir,"Y.csv"), Y) # np.savetxt(os.path.join(dir,"Y.csv"), Y)
profiles = np.loadtxt(os.path.join(dir, "profiles.csv")) profiles = np.loadtxt(os.path.join(dir, "profiles.csv"))
X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2) X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2)
Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2) Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2)
#idx = np.argsort(np.bincount(np.argmin(Y, axis=1)))
idx = np.argsort(Y[np.argmax(X),:])
Y = Y[:, idx]
profiles = profiles[idx]
return X, Y, profiles return X, Y, profiles

View File

@@ -207,12 +207,12 @@ def benchmark(template, statement, device):
return current_time/N return current_time/N
def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters): def sanitize_string(string, keep_chars = ['_']):
string = string.replace(' ', '_').replace('-', '_').lower()
string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
return string
def sanitize_string(string, keep_chars = ['_']): def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
string = string.replace(' ', '_').replace('-', '_').lower()
string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
return string
def append_include(data, path): def append_include(data, path):
include_name = '#include "' + path +'"\n' include_name = '#include "' + path +'"\n'

View File

@@ -1,7 +1,6 @@
from sklearn import tree from sklearn import tree
from sklearn import ensemble from sklearn import ensemble
from numpy import array, bincount, mean, std, max, argmax, min, argmin, median import numpy as np
def gmean(a, axis=0, dtype=None): def gmean(a, axis=0, dtype=None):
if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it
@@ -16,25 +15,30 @@ def gmean(a, axis=0, dtype=None):
return np.exp(log_a.mean(axis=axis)) return np.exp(log_a.mean(axis=axis))
def train_model(X, Y, profiles, metric): def train_model(X, Y, profiles, metric):
print("Building the model...") Y=Y[:,:]
profiles=profiles[:]
Xmean = mean(X) Ymax = np.max(Y)
Xstd = std(X)
X = (X - Xmean)/Xstd
Y = Y[:, :]
Ymax = max(Y)
Y = Y/Ymax Y = Y/Ymax
ref = argmax(bincount(argmin(Y, axis=1))) #most common profile
cut = int(0.800*X.shape[0]+1)
#Train the model #Train the model
clf = ensemble.RandomForestRegressor(10, max_depth=10).fit(X[:cut,:], Y[:cut,:]) cut = int(0.75*X.shape[0])
clf = ensemble.RandomForestRegressor(10, max_depth=4).fit(X[:cut,:], Y[:cut,:])
t = argmin(clf.predict(X[cut:,:]), axis = 1) print clf.predict([10000])
s = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)])
tt = argmin(Y[cut:,:], axis = 1) t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
ss = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)]) s = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], t)])
print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(s), median(s), min(s), max(s))) tt = np.argmin(Y[cut:,:], axis = 1)
print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(ss), median(ss), min(ss), max(ss))) ss = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], tt)])
p5 = lambda a: np.percentile(a, 5)
p25 = lambda a: np.percentile(a, 25)
p50 = lambda a: np.percentile(a, 50)
p75 = lambda a: np.percentile(a, 75)
p95 = lambda a: np.percentile(a, 95)
print("Percentile :\t 5 \t 25 \t 50 \t 75 \t 95")
print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s)))
print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss)))
return clf