41 lines
1.4 KiB
Python
41 lines
1.4 KiB
Python
from sklearn import tree
|
|
from sklearn import ensemble
|
|
from numpy import array, bincount, mean, std, max, argmax, min, argmin, median
|
|
|
|
|
|
def gmean(a, axis=0, dtype=None):
|
|
if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it
|
|
log_a = np.log(np.array(a, dtype=dtype))
|
|
elif dtype: # Must change the default dtype allowing array type
|
|
if isinstance(a,np.ma.MaskedArray):
|
|
log_a = np.log(np.ma.asarray(a, dtype=dtype))
|
|
else:
|
|
log_a = np.log(np.asarray(a, dtype=dtype))
|
|
else:
|
|
log_a = np.log(a)
|
|
return np.exp(log_a.mean(axis=axis))
|
|
|
|
def train_model(X, Y, profiles, metric):
|
|
print("Building the model...")
|
|
|
|
Xmean = mean(X)
|
|
Xstd = std(X)
|
|
X = (X - Xmean)/Xstd
|
|
|
|
Y = Y[:, :]
|
|
Ymax = max(Y)
|
|
Y = Y/Ymax
|
|
|
|
ref = argmax(bincount(argmin(Y, axis=1))) #most common profile
|
|
cut = int(0.800*X.shape[0]+1)
|
|
|
|
#Train the model
|
|
clf = ensemble.RandomForestRegressor(10, max_depth=10).fit(X[:cut,:], Y[:cut,:])
|
|
|
|
t = argmin(clf.predict(X[cut:,:]), axis = 1)
|
|
s = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)])
|
|
tt = argmin(Y[cut:,:], axis = 1)
|
|
ss = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)])
|
|
print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(s), median(s), min(s), max(s)))
|
|
print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f, max %.3f"%(gmean(ss), median(ss), min(ss), max(ss)))
|