Files
triton/tune/regression.py
2017-09-03 20:10:26 -04:00

103 lines
3.5 KiB
Python
Executable File

import numpy as np
import keras as kr
import tensorflow as tf
import isaac as sc
import struct
from keras import backend as K
from tools import ProgressBar, load
from keras.layers import Activation, Dense
def logit(x):
return K.log(x/(1-x))
def train(prefix, OpType, X, y, nepochs = 100):
progress = ProgressBar('Training')
model_path = '{}/model.hdf5'.format(prefix)
#Release ISAAC's driver
sc.driver.release()
np.random.seed(0)
with tf.device('/cpu:0'):
#Limit GPU memory usage
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
kr.backend.set_session(sess)
#Features transformation
X = np.log2(X)
#X = np.tile(X, [1, X.shape[1]])*np.repeat(X, X.shape[1], 1)
#print(X.shape)
#X = (X - np.mean(X, 0))/np.max(X, 0)
#Model
model = kr.models.Sequential()
for i,L in enumerate([64, 32, 16, 8]):
model.add(kr.layers.Dense(L, input_dim=X.shape[1]))
model.add(kr.layers.Activation('relu'))
model.add(kr.layers.Dense(1))
model.add(kr.layers.Activation('relu'))
model.compile(loss='mean_squared_error', optimizer='adam')
#Train
history = model.fit(X, y, validation_split=0.1, batch_size=16, epochs=nepochs,
verbose=1, callbacks = [kr.callbacks.LambdaCallback(on_epoch_end = lambda i, _: progress.update(i, nepochs))])
model.save(model_path)
model = kr.models.load_model(model_path)
return model
def maximize(OpType, device, model, shapes, V):
#Build features
X = np.zeros((V.shape[0], OpType.nparams), dtype=np.uint32)
X[:, :OpType.nshape_params] = shapes
X[:, OpType.nshape_params:] = V
X = OpType.get_valid(device, X)
#Model predictions
with tf.device('/cpu:0'):
predictions = model.predict(np.log2(X), batch_size=8192, verbose=0)
pred_perfs = np.sort(predictions, axis=0)[::-1]
pred_idxs = np.argsort(predictions, axis=0)[::-1]
#Evaluate best predicted models
ctx = sc.driver.default_context()
stream = sc.driver.default_stream()
perf, idx = [], []
for i, (pred_perf, pred_idx) in enumerate(zip(pred_perfs, pred_idxs)):
params = X[pred_idx,:][0].astype(int)
#print(params)
try:
y = OpType(params).benchmark(ctx, stream)
except RuntimeError:
continue
#Update
perf += [y]
idx += [pred_idx]
if len(perf)==100:
break
#Return the actual best
fmax = np.max(perf)
farg_max = X[pred_idxs[np.argmax(perf)],OpType.nshape_params:]
return fmax, farg_max[0].astype(np.uint32)
def prune(prefix, OpType, device, model):
progress = ProgressBar('Pruning')
#Restore progress
path = '{}/prune.npz'.format(prefix)
X, Y = load(path, [('X', OpType.nshape_params), ('Y', OpType.nparams - OpType.nshape_params)])
Y = Y.astype(np.uint32)
V = OpType.all_valid(device)
#Update
i = Y.shape[0]
S = OpType.bench_shapes(device)
nsamples = len(S)
progress.update(i, nsamples)
for x in S:
perf, y = maximize(OpType, device, model, x, V)
print(x, perf)
X = np.vstack((X, x))
Y = np.vstack((Y, y))
progress.update(i, nsamples)
np.savez(path, X = X[:i, :], Y = Y[:i, :])
i += 1
if i > nsamples: break
#Remove duplicates
Y = np.vstack(set(map(tuple, Y)))
return Y