Files
triton/tune/regression.py
2017-05-07 16:51:51 -07:00

94 lines
3.2 KiB
Python

import numpy as np
import keras as kr
import tensorflow as tf
import isaac as sc
import struct
from tools import ProgressBar, load
def train(prefix, OpType, X, y, nepochs = 100):
progress = ProgressBar('Training')
model_path = '{}/model.hdf5'.format(prefix)
#Release ISAAC's driver
sc.driver.release()
np.random.seed(0)
with tf.device('/cpu:0'):
#Limit GPU memory usage
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
kr.backend.set_session(sess)
#Features transformation
X = np.log2(X)
#Model
model = kr.models.Sequential()
for L in [128, 64, 32, 16, 8]:
model.add(kr.layers.Dense(L, input_dim=X.shape[1]))
model.add(kr.layers.Activation('relu'))
model.add(kr.layers.Dense(1))
model.add(kr.layers.Activation('relu'))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
#Train
history = model.fit(X, y, validation_split=0.1, batch_size=64, epochs=nepochs,
verbose=0, callbacks = [kr.callbacks.LambdaCallback(on_epoch_end = lambda i, _: progress.update(i, nepochs))])
model.save(model_path)
model = kr.models.load_model(model_path)
return model
def maximize(OpType, device, model, shapes, V):
#Build features
X = np.zeros((V.shape[0], OpType.nparams), dtype=np.uint32)
X[:, :OpType.nshape_params] = shapes
X[:, OpType.nshape_params:] = V
X = X[OpType.check_valid(device, X), :]
#Model predictions
predictions = model.predict(np.log2(X), batch_size=8192, verbose=0)
pred_perfs = np.sort(predictions, axis=0)[::-1]
pred_idxs = np.argsort(predictions, axis=0)[::-1]
#Evaluate best predicted models
ctx = sc.driver.default_context()
stream = sc.driver.default_stream()
perf, idx = [], []
for i, (pred_perf, pred_idx) in enumerate(zip(pred_perfs, pred_idxs)):
params = X[pred_idx,:][0].astype(int)
#print(params)
try:
y = OpType(params).benchmark(ctx, stream)
except RuntimeError:
continue
#Update
perf += [y]
idx += [pred_idx]
if len(perf)==100:
break
#Return the actual best
fmax = np.max(perf)
farg_max = X[pred_idxs[np.argmax(perf)],OpType.nshape_params:]
return fmax, farg_max[0].astype(np.uint32)
def prune(prefix, OpType, device, model):
progress = ProgressBar('Pruning')
#Restore progress
path = '{}/prune.npz'.format(prefix)
X, Y = load(path, [('X', OpType.nshape_params), ('Y', OpType.nparams - OpType.nshape_params)])
Y = Y.astype(np.uint32)
V = OpType.all_valid(device)
#Update
i = Y.shape[0]
S = OpType.bench_shapes(device)
nsamples = len(S)
progress.update(i, nsamples)
for x in S:
perf, y = maximize(OpType, device, model, x, V)
X = np.vstack((X, x))
Y = np.vstack((Y, y))
progress.update(i, nsamples)
np.savez(path, X = X[:i, :], Y = Y[:i, :])
i += 1
if i > nsamples: break
#Remove duplicates
Y = np.vstack(set(map(tuple, Y)))
return Y