Files
triton/python/isaac/autotuning/tune.py

143 lines
5.6 KiB
Python

import random, argparse, json, os
from math import log, isinf
from itertools import chain, product
from numpy import argsort, argmax
from operator import mul
import isaac as sc
from external.sklearn.forest import RandomForestRegressor
import optimize, tools, model
from json import encoder
import json
encoder.FLOAT_REPR = lambda o: format(o, '.2f')
encoder.separators = (',',':')
def unique(L):
seen = set()
seen_add = seen.add
return [ x for x in L if not (x in seen or seen_add(x))]
def pow2range(a, b):
return [2**x for x in range(a, b)]
def tune(device, operation, json_path):
#Context
context = sc.driver.context(device)
#List of size tuples to use
sizes = {}
sizes[sc.templates.axpy] = [(x,) for x in tools.expspace(1e3, 1e8, 4)]
sizes[sc.templates.gemv_n] = product(pow2range(4,17), pow2range(4,17))
sizes[sc.templates.gemv_t] = sizes[sc.templates.gemv_n]
sizes[sc.templates.gemm_nn] = product(pow2range(6, 12), pow2range(6, 12), pow2range(6, 12))
sizes[sc.templates.gemm_tn] = sizes[sc.templates.gemm_nn]
sizes[sc.templates.gemm_nt] = sizes[sc.templates.gemm_nn]
sizes[sc.templates.gemm_tt] = sizes[sc.templates.gemm_nn]
#Quick tuning - AlexNet sizes + Intuition
sizes[sc.templates.ger] = [(1536,1536)]
sizes[sc.templates.gemv_n] = [(1000,256),
(4096,256)]
sizes[sc.templates.gemv_t] = [(169,256),
(169,384),
(729,256),
(3025,96)]
sizes[sc.templates.gemm_nn] = [(3025,96,363),
(729,128,1200),
(169,384,2304),
(169,192,1728),
(169,128,1728)]
sizes[sc.templates.gemm_nt] = [(169,1728,128),
(169,1728,192),
(169,2304,384),
(729,1200,128)]
sizes[sc.templates.gemm_tn] = [(1728,128,169),
(1728,192,169),
(2304,384,169),
(1200,128,729),
(363,96,3025)]
#Remove duplicated
sizes = unique(list(sizes[operation]))
sizes = [x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 1e-1]
#Training data
performance = tools.metric_of(operation)
profiles = []
X = []
Y = []
for idx, x in enumerate(sizes):
print x
nparams = len(profiles)
tree, operands = tools.tree_of(operation, x, context)
#Check if the current best prediction is not a local optimum
if idx==0:
tune = True
predicted = None
else:
if nparams==1:
predicted = profiles[0]
else:
clf = RandomForestRegressor(min(10, idx+1), max_depth=min(10, idx+1)).fit(X, Y)
#clf, nrmse = model.train(X, Y, profiles)
predperf = clf.predict(x)[0]
best = (-predperf).argsort()[:5]
perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
predicted = profiles[best[argmax(perf)]]
#tune = not optimize.is_local_optimum(predicted, operation, x, context)
tune = True
#Retune if necessary
if tune:
#new = optimize.exhaustive(operation, x, context)
new = optimize.genetic(operation, x, context, niter=1000, naccept=1000, popsize=20, prior=predicted)[0]
if new not in profiles:
profiles.append(new)
if idx > 0:
for xx,yy in zip(X, Y):
_tree, _operands = tools.tree_of(operation, xx, context)
try:
time = tools.benchmark(operation, new, _tree)
perf = performance(xx, time)
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
perf = 0
yy.append(0 if isinf(perf) else perf)
#Update dataset
y = []
fastest = max(predperf) if nparams > 1 else None
for ip, p in enumerate(profiles):
try:
perf = 0 if fastest and ip < nparams and predperf[ip]/fastest < .1 else performance(x,tools.benchmark(operation, p, tree))
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
perf = 0
y.append(0 if isinf(perf) else perf)
X.append(x)
Y.append(y)
#Export to JSON
json_path = tools.sanitize(device.name) + '.json' if not json_path else json_path
if os.path.isfile(json_path):
json_data = json.load(open(json_path, 'r'))
else:
json_data = {}
json_data["version"] = "1.0"
operation_name = operation.__name__
if operation_name not in json_data:
json_data[operation_name] = {}
json_data[operation_name]['float32'] = {}
D = json_data[operation_name]['float32']
if len(profiles) > 1:
clf, nrmse = model.train(X, Y, profiles)
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
'children_right': e.tree_.children_right.tolist(),
'threshold': e.tree_.threshold.astype('float64').tolist(),
'feature': e.tree_.feature.astype('float64').tolist(),
'value': e.tree_.value[:,:,0].astype('float64').tolist()} for e in clf.estimators_]
D['profiles'] = [map(int, x) for x in profiles]
json.dump(json_data, open(json_path,'w'))