from sklearn import *; from sklearn import ensemble; import numpy as np import scipy as sp def train_model(X, Y, profiles, metric): #Preprocessing Xmean = np.mean(X, axis=0) Xstd = np.std(X, axis=0) X = (X - Xmean)/Xstd Ymax = np.max(Y) Y = Y/Ymax ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile #Cross-validation data-sets cut = int(0.800*X.shape[0]+1) XTr = X[0:cut, :] YTr = Y[0:cut, :] XTe = X[cut:,:] YTe = Y[cut:,:] #Train the model print("Training the model...") clf = ensemble.RandomForestRegressor(40).fit(XTr,YTr) #Evaluate the model GFlops = np.empty(XTe.shape[0]) speedups = np.empty(XTe.shape[0]) optspeedups = np.empty(XTe.shape[0]) for i,x in enumerate(XTe): predictions = clf.predict(x) label = np.argmax(predictions) speedups[i] = YTe[i,label]/YTe[i,ref] optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref] GFlops[i] = YTe[i,ref]*Ymax np.set_printoptions(precision=2) print("-----------------") print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups))) print("Average %s: %f (Default %f, Optimal %f)"%(metric, np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups)))) print("Minimum speedup is %f wrt %i %s"%(np.min(speedups), GFlops[np.argmin(speedups)], metric)) print("Maximum speedup is %f wrt %i %s"%(np.max(speedups), GFlops[np.argmax(speedups)], metric)) print("--------")