Files
triton/python/autotune/pysrc/model.py

41 lines
1.4 KiB
Python
Raw Normal View History

from sklearn import tree
from sklearn import ensemble
2014-10-14 23:49:18 -04:00
from numpy import array, bincount, mean, std, max, argmax, min, argmin, median
def gmean(a, axis=0, dtype=None):
if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it
log_a = np.log(np.array(a, dtype=dtype))
elif dtype: # Must change the default dtype allowing array type
if isinstance(a,np.ma.MaskedArray):
log_a = np.log(np.ma.asarray(a, dtype=dtype))
else:
log_a = np.log(np.asarray(a, dtype=dtype))
else:
log_a = np.log(a)
return np.exp(log_a.mean(axis=axis))
def train_model(X, Y, profiles, metric):
print("Building the model...")
2014-10-14 23:49:18 -04:00
Xmean = mean(X)
Xstd = std(X)
2014-10-01 04:44:16 +02:00
X = (X - Xmean)/Xstd
2014-10-03 09:29:45 +02:00
Y = Y[:, :]
2014-10-14 23:49:18 -04:00
Ymax = max(Y)
2014-10-01 04:44:16 +02:00
Y = Y/Ymax
2014-09-28 19:37:56 -04:00
2014-10-14 23:49:18 -04:00
ref = argmax(bincount(argmin(Y, axis=1))) #most common profile
2014-10-03 09:29:45 +02:00
cut = int(0.800*X.shape[0]+1)
2014-09-29 03:01:33 +02:00
#Train the model
clf = ensemble.RandomForestRegressor(10, max_depth=10).fit(X[:cut,:], Y[:cut,:])
2014-10-14 23:49:18 -04:00
t = argmin(clf.predict(X[cut:,:]), axis = 1)
s = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)])
tt = argmin(Y[cut:,:], axis = 1)
ss = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)])
print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(s), median(s), min(s), max(s)))
print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(ss), median(ss), min(ss), max(ss)))