from sklearn import *; from sklearn import ensemble; import numpy as np import scipy as sp from pybrain.datasets import SupervisedDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure import LinearLayer, TanhLayer, SigmoidLayer, SoftmaxLayer, FeedForwardNetwork, BiasUnit from pybrain.tools.neuralnets import NNregression, Trainer def train_model(X, Y, profiles): #Preprocessing Xmean = np.mean(X, axis=0) Xstd = np.std(X, axis=0) X = (X - Xmean)/Xstd Ymax = np.max(Y) Y = Y/Ymax ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile #Cross-validation data-sets cut = int(0.800*X.shape[0]+1) XTr = X[0:cut, :] YTr = Y[0:cut, :] XTe = X[cut:,:] YTe = Y[cut:,:] #Train the model print("Training the model...") clf = ensemble.RandomForestRegressor(40).fit(XTr,YTr) #Evaluate the model GFlops = np.empty(XTe.shape[0]) speedups = np.empty(XTe.shape[0]) optspeedups = np.empty(XTe.shape[0]) for i,x in enumerate(XTe): predictions = clf.predict(x) label = np.argmax(predictions) speedups[i] = YTe[i,label]/YTe[i,ref] optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref] GFlops[i] = YTe[i,ref]*Ymax np.set_printoptions(precision=2) print("-----------------") print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups))) print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups)))) print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)])) print("Maximum speedup is %f wrt %i GFlops for %s"%(np.max(speedups), GFlops[np.argmax(speedups)], X[np.argmax(speedups)]*Xstd+Xmean)) print("--------")