Python/Autotune: Moved devices from config.ini to command line argument
This commit is contained in:
45
python/autotune/external/config.ini
vendored
45
python/autotune/external/config.ini
vendored
@@ -1,28 +1,23 @@
|
||||
viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/
|
||||
#~ viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/
|
||||
|
||||
[vector-axpy]
|
||||
devices = 0
|
||||
precision = single, double
|
||||
size = 5000000
|
||||
precision = single
|
||||
#~ size = 5000000
|
||||
|
||||
[reduction]
|
||||
devices = 0
|
||||
precision = single, double
|
||||
size = 5000000
|
||||
|
||||
[matrix-axpy]
|
||||
devices = 0
|
||||
precision = single, double
|
||||
size = 2560, 2560
|
||||
|
||||
[row-wise-reduction]
|
||||
devices = 0
|
||||
precision = single, double
|
||||
layout = N,T
|
||||
size = 2560, 2560
|
||||
|
||||
[matrix-product]
|
||||
devices = 0
|
||||
precision = single, double
|
||||
layout = NN,NT,TN,TT
|
||||
size = 1536, 1536, 1536
|
||||
#~ [reduction]
|
||||
#~ precision = single, double
|
||||
#~ size = 5000000
|
||||
#~
|
||||
#~ [matrix-axpy]
|
||||
#~ precision = single, double
|
||||
#~ size = 2560, 2560
|
||||
#~
|
||||
#~ [row-wise-reduction]
|
||||
#~ precision = single, double
|
||||
#~ layout = N,T
|
||||
#~ size = 2560, 2560
|
||||
#~
|
||||
#~ [matrix-product]
|
||||
#~ precision = single, double
|
||||
#~ layout = NN,NT,TN,TT
|
||||
#~ size = 1536, 1536, 1536
|
||||
|
@@ -1,18 +1,19 @@
|
||||
from __future__ import division
|
||||
|
||||
import argparse, itertools, os, sys
|
||||
import argparse, itertools, os, sys, json
|
||||
import misc_tools, optimize
|
||||
|
||||
import pyopencl as cl
|
||||
import pyviennacl as vcl
|
||||
import pyatidlas as atd
|
||||
|
||||
import numpy as np
|
||||
|
||||
from configobj import ConfigObj
|
||||
from numpy import random
|
||||
from dataset import generate_dataset
|
||||
from model import train_model
|
||||
|
||||
|
||||
DATATYPES = { 'single' : vcl.float32,
|
||||
'double' : vcl.float64 }
|
||||
|
||||
@@ -36,34 +37,34 @@ TYPES = { 'vector-axpy': {'template':atd.VectorAxpyTemplate,
|
||||
'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
|
||||
'perf-measure': 'GFLOP/s'} }
|
||||
|
||||
def do_tuning(config_fname, viennacl_root):
|
||||
|
||||
def do_tuning(config_fname, viennacl_root, device):
|
||||
json_out = {}
|
||||
config = ConfigObj(config_fname)
|
||||
|
||||
def map_to_list(T, x):
|
||||
return list(map(T, x if isinstance(x, list) else [x]))
|
||||
|
||||
for operation in ['vector-axpy', 'matrix-axpy', 'reduction', 'row-wise-reduction', 'matrix-product']:
|
||||
|
||||
if operation in config:
|
||||
p = config[operation]
|
||||
confdevices = p['devices']
|
||||
all_devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
|
||||
DEVICES_PRESETS = {'all': all_devices,
|
||||
'gpus': [d for d in all_devices if d.type==cl.device_type.GPU],
|
||||
'cpus': [d for d in all_devices if d.type==cl.device_type.CPU],
|
||||
'accelerators': [d for d in all_devices if d.type==cl.device_type.ACCELERATOR]
|
||||
}
|
||||
devices = DEVICES_PRESETS[confdevices] if confdevices in DEVICES_PRESETS else [all_devices[int(i)] for i in confdevices]
|
||||
precisions = map_to_list(str, p['precision'])
|
||||
if 'all' in precisions:
|
||||
precisions = ['single','double']
|
||||
datatypes = [DATATYPES[k] for k in precisions]
|
||||
#Iterate through the datatypes and the devices
|
||||
for datatype, device in itertools.product(datatypes, devices):
|
||||
|
||||
#Iterate through the datatypes
|
||||
for datatype in datatypes:
|
||||
|
||||
ctx = cl.Context([device])
|
||||
ctx = vcl.backend.Context(ctx)
|
||||
device = ctx.current_device
|
||||
|
||||
#Check data-type
|
||||
if datatype is vcl.float64 and not device.double_fp_config:
|
||||
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
|
||||
continue
|
||||
|
||||
#Helper for execution
|
||||
def execute(device, node, other_params, sizes, fname = os.devnull, parameters = None):
|
||||
with vcl.Statement(node) as statement:
|
||||
@@ -75,6 +76,7 @@ def do_tuning(config_fname, viennacl_root):
|
||||
with open(fname, "w+") as archive:
|
||||
return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
||||
lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
|
||||
|
||||
#Helper for tuning
|
||||
def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters):
|
||||
if 'size' in p:
|
||||
@@ -85,7 +87,20 @@ def do_tuning(config_fname, viennacl_root):
|
||||
def compute_perf(x, t):
|
||||
return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
|
||||
X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler, nTuning, nDataPoints, draw)
|
||||
train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
|
||||
clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
|
||||
|
||||
#Update JSON
|
||||
full_operation = operation + ''.join(additional_parameters)
|
||||
if full_operation not in json_out:
|
||||
json_out[full_operation] = {}
|
||||
json_out[full_operation][datatype.__name__] = {}
|
||||
D = json_out[full_operation][datatype.__name__]
|
||||
D['profiles'] = [ prof.astype('int').tolist() for prof in profiles]
|
||||
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
|
||||
'children_right': e.tree_.children_right.tolist(),
|
||||
'threshold': e.tree_.threshold.astype('float32').tolist(),
|
||||
'feature': e.tree_.feature.astype('float32').tolist(),
|
||||
'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_]
|
||||
|
||||
#Vector AXPY
|
||||
if operation=='vector-axpy':
|
||||
@@ -143,6 +158,10 @@ def do_tuning(config_fname, viennacl_root):
|
||||
return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters)
|
||||
tune(execution_handler, 50, 2000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1]))
|
||||
|
||||
dname = misc_tools.sanitize_string(device.name)
|
||||
json_out["version"] = "1.0"
|
||||
json.dump(json_out, open(dname + '.json','w'))
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -151,14 +170,15 @@ if __name__ == "__main__":
|
||||
print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available')
|
||||
tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file')
|
||||
tune_parser.add_argument("--config", default="config.ini", required=False, type=str)
|
||||
tune_parser.add_argument("--device", default=0, required=False, type=str)
|
||||
tune_parser.add_argument("--viennacl-root", default='', required=False, type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
|
||||
if(args.action=='list-devices'):
|
||||
print("----------------")
|
||||
print("Devices available:")
|
||||
print("----------------")
|
||||
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
|
||||
for (i, d) in enumerate(devices):
|
||||
print 'Device', i, '|', cl.device_type.to_string(d.type), '|', d.name, 'on', d.platform.name
|
||||
print("----------------")
|
||||
@@ -166,4 +186,4 @@ if __name__ == "__main__":
|
||||
print("------")
|
||||
print("Auto-tuning")
|
||||
print("------")
|
||||
do_tuning(args.config, args.viennacl_root)
|
||||
do_tuning(args.config, args.viennacl_root, devices[args.device])
|
||||
|
@@ -15,48 +15,53 @@ def resample(X, draw):
|
||||
|
||||
def generate_dataset(TemplateType, execution_handler, nTuning, nDataPoints, draw):
|
||||
|
||||
print "Getting some good profiles..."
|
||||
nDim = draw().size
|
||||
X = np.empty((nTuning, nDim))
|
||||
t = np.empty(nTuning)
|
||||
profiles = []
|
||||
for i in range(nTuning):
|
||||
x = resample(X, draw)
|
||||
y = execution_handler(x)
|
||||
if y not in profiles:
|
||||
profiles.append(y)
|
||||
idx = profiles.index(y)
|
||||
X[i,:] = x
|
||||
t[i] = idx
|
||||
|
||||
print "Generating the dataset..."
|
||||
Y = np.empty((nDataPoints, len(profiles)))
|
||||
X = np.empty((nDataPoints, nDim))
|
||||
t = []
|
||||
|
||||
for i in range(nDataPoints):
|
||||
x = resample(X, draw)
|
||||
for j,y in enumerate(profiles):
|
||||
T = execution_handler(x, os.devnull, y)
|
||||
Y[i,j] = T
|
||||
idx = np.argmax(Y[i,:])
|
||||
X[i,:] = x
|
||||
t = np.argmax(Y[:i+1,], axis=1)
|
||||
if i%10==0:
|
||||
sys.stdout.write('%d data points generated\r'%i)
|
||||
sys.stdout.flush()
|
||||
# print "Getting some good profiles..."
|
||||
# nDim = draw().size
|
||||
# X = np.empty((nTuning, nDim))
|
||||
# t = np.empty(nTuning)
|
||||
# profiles = []
|
||||
# for i in range(nTuning):
|
||||
# x = resample(X, draw)
|
||||
# y = execution_handler(x)
|
||||
# if y not in profiles:
|
||||
# profiles.append(y)
|
||||
# idx = profiles.index(y)
|
||||
# X[i,:] = x
|
||||
# t[i] = idx
|
||||
#
|
||||
# print "Generating the dataset..."
|
||||
# Y = np.empty((nDataPoints, len(profiles)))
|
||||
# X = np.empty((nDataPoints, nDim))
|
||||
# t = []
|
||||
#
|
||||
# for i in range(nDataPoints):
|
||||
# x = resample(X, draw)
|
||||
# for j,y in enumerate(profiles):
|
||||
# T = execution_handler(x, os.devnull, y)
|
||||
# Y[i,j] = T
|
||||
# idx = np.argmax(Y[i,:])
|
||||
# X[i,:] = x
|
||||
# t = np.argmax(Y[:i+1,], axis=1)
|
||||
# if i%10==0:
|
||||
# sys.stdout.write('%d data points generated\r'%i)
|
||||
# sys.stdout.flush()
|
||||
|
||||
template_name = TemplateType.__name__
|
||||
dir = os.path.join("data", template_name)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
|
||||
np.savetxt(os.path.join(dir,"X.csv"), X)
|
||||
np.savetxt(os.path.join(dir,"Y.csv"), Y)
|
||||
# np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
|
||||
# np.savetxt(os.path.join(dir,"X.csv"), X)
|
||||
# np.savetxt(os.path.join(dir,"Y.csv"), Y)
|
||||
|
||||
profiles = np.loadtxt(os.path.join(dir, "profiles.csv"))
|
||||
X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2)
|
||||
Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2)
|
||||
|
||||
#idx = np.argsort(np.bincount(np.argmin(Y, axis=1)))
|
||||
idx = np.argsort(Y[np.argmax(X),:])
|
||||
Y = Y[:, idx]
|
||||
profiles = profiles[idx]
|
||||
|
||||
return X, Y, profiles
|
||||
|
@@ -207,13 +207,13 @@ def benchmark(template, statement, device):
|
||||
return current_time/N
|
||||
|
||||
|
||||
def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
|
||||
|
||||
def sanitize_string(string, keep_chars = ['_']):
|
||||
string = string.replace(' ', '_').replace('-', '_').lower()
|
||||
string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
|
||||
return string
|
||||
|
||||
def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
|
||||
|
||||
def append_include(data, path):
|
||||
include_name = '#include "' + path +'"\n'
|
||||
already_included = data.find(include_name)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
from sklearn import tree
|
||||
from sklearn import ensemble
|
||||
from numpy import array, bincount, mean, std, max, argmax, min, argmin, median
|
||||
|
||||
import numpy as np
|
||||
|
||||
def gmean(a, axis=0, dtype=None):
|
||||
if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it
|
||||
@@ -16,25 +15,30 @@ def gmean(a, axis=0, dtype=None):
|
||||
return np.exp(log_a.mean(axis=axis))
|
||||
|
||||
def train_model(X, Y, profiles, metric):
|
||||
print("Building the model...")
|
||||
|
||||
Xmean = mean(X)
|
||||
Xstd = std(X)
|
||||
X = (X - Xmean)/Xstd
|
||||
|
||||
Y=Y[:,:]
|
||||
Ymax = max(Y)
|
||||
profiles=profiles[:]
|
||||
Ymax = np.max(Y)
|
||||
Y = Y/Ymax
|
||||
|
||||
ref = argmax(bincount(argmin(Y, axis=1))) #most common profile
|
||||
cut = int(0.800*X.shape[0]+1)
|
||||
|
||||
#Train the model
|
||||
clf = ensemble.RandomForestRegressor(10, max_depth=10).fit(X[:cut,:], Y[:cut,:])
|
||||
cut = int(0.75*X.shape[0])
|
||||
clf = ensemble.RandomForestRegressor(10, max_depth=4).fit(X[:cut,:], Y[:cut,:])
|
||||
|
||||
t = argmin(clf.predict(X[cut:,:]), axis = 1)
|
||||
s = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)])
|
||||
tt = argmin(Y[cut:,:], axis = 1)
|
||||
ss = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)])
|
||||
print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(s), median(s), min(s), max(s)))
|
||||
print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(ss), median(ss), min(ss), max(ss)))
|
||||
print clf.predict([10000])
|
||||
|
||||
t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
|
||||
s = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], t)])
|
||||
tt = np.argmin(Y[cut:,:], axis = 1)
|
||||
ss = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], tt)])
|
||||
|
||||
p5 = lambda a: np.percentile(a, 5)
|
||||
p25 = lambda a: np.percentile(a, 25)
|
||||
p50 = lambda a: np.percentile(a, 50)
|
||||
p75 = lambda a: np.percentile(a, 75)
|
||||
p95 = lambda a: np.percentile(a, 95)
|
||||
|
||||
print("Percentile :\t 5 \t 25 \t 50 \t 75 \t 95")
|
||||
print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s)))
|
||||
print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss)))
|
||||
|
||||
return clf
|
||||
|
Reference in New Issue
Block a user