Python/Autotune: Moved devices from config.ini to command line argument

This commit is contained in:
Philippe Tillet
2014-10-27 03:28:46 -04:00
parent 7780423fa1
commit ba50960b0f
5 changed files with 124 additions and 100 deletions

View File

@@ -1,28 +1,23 @@
viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/
#~ viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/
[vector-axpy]
devices = 0
precision = single, double
size = 5000000
precision = single
#~ size = 5000000
[reduction]
devices = 0
precision = single, double
size = 5000000
[matrix-axpy]
devices = 0
precision = single, double
size = 2560, 2560
[row-wise-reduction]
devices = 0
precision = single, double
layout = N,T
size = 2560, 2560
[matrix-product]
devices = 0
precision = single, double
layout = NN,NT,TN,TT
size = 1536, 1536, 1536
#~ [reduction]
#~ precision = single, double
#~ size = 5000000
#~
#~ [matrix-axpy]
#~ precision = single, double
#~ size = 2560, 2560
#~
#~ [row-wise-reduction]
#~ precision = single, double
#~ layout = N,T
#~ size = 2560, 2560
#~
#~ [matrix-product]
#~ precision = single, double
#~ layout = NN,NT,TN,TT
#~ size = 1536, 1536, 1536

View File

@@ -1,18 +1,19 @@
from __future__ import division
import argparse, itertools, os, sys
import argparse, itertools, os, sys, json
import misc_tools, optimize
import pyopencl as cl
import pyviennacl as vcl
import pyatidlas as atd
import numpy as np
from configobj import ConfigObj
from numpy import random
from dataset import generate_dataset
from model import train_model
DATATYPES = { 'single' : vcl.float32,
'double' : vcl.float64 }
@@ -36,34 +37,34 @@ TYPES = { 'vector-axpy': {'template':atd.VectorAxpyTemplate,
'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
'perf-measure': 'GFLOP/s'} }
def do_tuning(config_fname, viennacl_root):
def do_tuning(config_fname, viennacl_root, device):
json_out = {}
config = ConfigObj(config_fname)
def map_to_list(T, x):
return list(map(T, x if isinstance(x, list) else [x]))
for operation in ['vector-axpy', 'matrix-axpy', 'reduction', 'row-wise-reduction', 'matrix-product']:
if operation in config:
p = config[operation]
confdevices = p['devices']
all_devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
DEVICES_PRESETS = {'all': all_devices,
'gpus': [d for d in all_devices if d.type==cl.device_type.GPU],
'cpus': [d for d in all_devices if d.type==cl.device_type.CPU],
'accelerators': [d for d in all_devices if d.type==cl.device_type.ACCELERATOR]
}
devices = DEVICES_PRESETS[confdevices] if confdevices in DEVICES_PRESETS else [all_devices[int(i)] for i in confdevices]
precisions = map_to_list(str, p['precision'])
if 'all' in precisions:
precisions = ['single','double']
datatypes = [DATATYPES[k] for k in precisions]
#Iterate through the datatypes and the devices
for datatype, device in itertools.product(datatypes, devices):
#Iterate through the datatypes
for datatype in datatypes:
ctx = cl.Context([device])
ctx = vcl.backend.Context(ctx)
device = ctx.current_device
#Check data-type
if datatype is vcl.float64 and not device.double_fp_config:
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
continue
#Helper for execution
def execute(device, node, other_params, sizes, fname = os.devnull, parameters = None):
with vcl.Statement(node) as statement:
@@ -75,6 +76,7 @@ def do_tuning(config_fname, viennacl_root):
with open(fname, "w+") as archive:
return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
#Helper for tuning
def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters):
if 'size' in p:
@@ -85,7 +87,20 @@ def do_tuning(config_fname, viennacl_root):
def compute_perf(x, t):
return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler, nTuning, nDataPoints, draw)
train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
#Update JSON
full_operation = operation + ''.join(additional_parameters)
if full_operation not in json_out:
json_out[full_operation] = {}
json_out[full_operation][datatype.__name__] = {}
D = json_out[full_operation][datatype.__name__]
D['profiles'] = [ prof.astype('int').tolist() for prof in profiles]
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
'children_right': e.tree_.children_right.tolist(),
'threshold': e.tree_.threshold.astype('float32').tolist(),
'feature': e.tree_.feature.astype('float32').tolist(),
'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_]
#Vector AXPY
if operation=='vector-axpy':
@@ -143,6 +158,10 @@ def do_tuning(config_fname, viennacl_root):
return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters)
tune(execution_handler, 50, 2000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1]))
dname = misc_tools.sanitize_string(device.name)
json_out["version"] = "1.0"
json.dump(json_out, open(dname + '.json','w'))
if __name__ == "__main__":
@@ -151,14 +170,15 @@ if __name__ == "__main__":
print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available')
tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file')
tune_parser.add_argument("--config", default="config.ini", required=False, type=str)
tune_parser.add_argument("--device", default=0, required=False, type=str)
tune_parser.add_argument("--viennacl-root", default='', required=False, type=str)
args = parser.parse_args()
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
if(args.action=='list-devices'):
print("----------------")
print("Devices available:")
print("----------------")
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
for (i, d) in enumerate(devices):
print 'Device', i, '|', cl.device_type.to_string(d.type), '|', d.name, 'on', d.platform.name
print("----------------")
@@ -166,4 +186,4 @@ if __name__ == "__main__":
print("------")
print("Auto-tuning")
print("------")
do_tuning(args.config, args.viennacl_root)
do_tuning(args.config, args.viennacl_root, devices[args.device])

View File

@@ -15,48 +15,53 @@ def resample(X, draw):
def generate_dataset(TemplateType, execution_handler, nTuning, nDataPoints, draw):
print "Getting some good profiles..."
nDim = draw().size
X = np.empty((nTuning, nDim))
t = np.empty(nTuning)
profiles = []
for i in range(nTuning):
x = resample(X, draw)
y = execution_handler(x)
if y not in profiles:
profiles.append(y)
idx = profiles.index(y)
X[i,:] = x
t[i] = idx
print "Generating the dataset..."
Y = np.empty((nDataPoints, len(profiles)))
X = np.empty((nDataPoints, nDim))
t = []
for i in range(nDataPoints):
x = resample(X, draw)
for j,y in enumerate(profiles):
T = execution_handler(x, os.devnull, y)
Y[i,j] = T
idx = np.argmax(Y[i,:])
X[i,:] = x
t = np.argmax(Y[:i+1,], axis=1)
if i%10==0:
sys.stdout.write('%d data points generated\r'%i)
sys.stdout.flush()
# print "Getting some good profiles..."
# nDim = draw().size
# X = np.empty((nTuning, nDim))
# t = np.empty(nTuning)
# profiles = []
# for i in range(nTuning):
# x = resample(X, draw)
# y = execution_handler(x)
# if y not in profiles:
# profiles.append(y)
# idx = profiles.index(y)
# X[i,:] = x
# t[i] = idx
#
# print "Generating the dataset..."
# Y = np.empty((nDataPoints, len(profiles)))
# X = np.empty((nDataPoints, nDim))
# t = []
#
# for i in range(nDataPoints):
# x = resample(X, draw)
# for j,y in enumerate(profiles):
# T = execution_handler(x, os.devnull, y)
# Y[i,j] = T
# idx = np.argmax(Y[i,:])
# X[i,:] = x
# t = np.argmax(Y[:i+1,], axis=1)
# if i%10==0:
# sys.stdout.write('%d data points generated\r'%i)
# sys.stdout.flush()
template_name = TemplateType.__name__
dir = os.path.join("data", template_name)
if not os.path.exists(dir):
os.makedirs(dir)
np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
np.savetxt(os.path.join(dir,"X.csv"), X)
np.savetxt(os.path.join(dir,"Y.csv"), Y)
# np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
# np.savetxt(os.path.join(dir,"X.csv"), X)
# np.savetxt(os.path.join(dir,"Y.csv"), Y)
profiles = np.loadtxt(os.path.join(dir, "profiles.csv"))
X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2)
Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2)
#idx = np.argsort(np.bincount(np.argmin(Y, axis=1)))
idx = np.argsort(Y[np.argmax(X),:])
Y = Y[:, idx]
profiles = profiles[idx]
return X, Y, profiles

View File

@@ -207,12 +207,12 @@ def benchmark(template, statement, device):
return current_time/N
def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
def sanitize_string(string, keep_chars = ['_']):
string = string.replace(' ', '_').replace('-', '_').lower()
string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
return string
def sanitize_string(string, keep_chars = ['_']):
string = string.replace(' ', '_').replace('-', '_').lower()
string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
return string
def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
def append_include(data, path):
include_name = '#include "' + path +'"\n'

View File

@@ -1,7 +1,6 @@
from sklearn import tree
from sklearn import ensemble
from numpy import array, bincount, mean, std, max, argmax, min, argmin, median
import numpy as np
def gmean(a, axis=0, dtype=None):
if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it
@@ -16,25 +15,30 @@ def gmean(a, axis=0, dtype=None):
return np.exp(log_a.mean(axis=axis))
def train_model(X, Y, profiles, metric):
print("Building the model...")
Xmean = mean(X)
Xstd = std(X)
X = (X - Xmean)/Xstd
Y = Y[:, :]
Ymax = max(Y)
Y=Y[:,:]
profiles=profiles[:]
Ymax = np.max(Y)
Y = Y/Ymax
ref = argmax(bincount(argmin(Y, axis=1))) #most common profile
cut = int(0.800*X.shape[0]+1)
#Train the model
clf = ensemble.RandomForestRegressor(10, max_depth=10).fit(X[:cut,:], Y[:cut,:])
cut = int(0.75*X.shape[0])
clf = ensemble.RandomForestRegressor(10, max_depth=4).fit(X[:cut,:], Y[:cut,:])
t = argmin(clf.predict(X[cut:,:]), axis = 1)
s = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)])
tt = argmin(Y[cut:,:], axis = 1)
ss = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)])
print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(s), median(s), min(s), max(s)))
print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(ss), median(ss), min(ss), max(ss)))
print clf.predict([10000])
t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
s = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], t)])
tt = np.argmin(Y[cut:,:], axis = 1)
ss = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], tt)])
p5 = lambda a: np.percentile(a, 5)
p25 = lambda a: np.percentile(a, 25)
p50 = lambda a: np.percentile(a, 50)
p75 = lambda a: np.percentile(a, 75)
p95 = lambda a: np.percentile(a, 95)
print("Percentile :\t 5 \t 25 \t 50 \t 75 \t 95")
print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s)))
print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss)))
return clf