This commit is contained in:
Philippe Tillet
2014-10-01 04:44:16 +02:00
parent 3523a3756f
commit 2f6d41f661
2 changed files with 79 additions and 84 deletions

View File

@@ -17,7 +17,7 @@ def resample(X, tbincount, densities, step):
r = random.random() r = random.random()
while(True): while(True):
if(len(tbincount)==0 or len(densities)==0 or r<=1.0/len(densities)): if(len(tbincount)==0 or len(densities)==0 or r<=1.0/len(densities)):
x = np.array([step*random.randint(1,40), step*random.randint(1,40), step*random.randint(1,40)]); x = np.array([step*random.randint(1,40), step*random.randint(1,40), step*random.randint(1,40)])
else: else:
probs = [1.0/x if x>0 else 0 for x in tbincount] probs = [1.0/x if x>0 else 0 for x in tbincount]
distr = np.random.choice(range(tbincount.size), p = probs/np.sum(probs)) distr = np.random.choice(range(tbincount.size), p = probs/np.sum(probs))
@@ -28,67 +28,46 @@ def resample(X, tbincount, densities, step):
return x.astype(int) return x.astype(int)
def generate_dataset(TemplateType, execution_handler): def generate_dataset(TemplateType, execution_handler):
I = 0 I = 10
step = 64 step = 64
max_size = 4000
path = "./data" path = "./data"
#Tries to resume # print "Getting some good profiles..."
try: # X = np.empty((I, 3))
X = np.loadtxt(open(os.path.join(path, "X.csv"),"rb")) # t = np.empty(I)
t = np.loadtxt(open(os.path.join(path, "t.csv"),"rb")) # profiles = []
profiles = np.loadtxt(open(os.path.join(path, "profiles.csv"),"rb")).tolist() # for i in range(I):
if not isinstance(profiles[0], list): # x = resample(X, [], [], step)
profiles = [profiles] # y = execution_handler(x)
N = t.size # if y not in profiles:
X.resize((N+I, 3), refcheck=False) # profiles.append(y)
t.resize(N+I, refcheck=False) # idx = profiles.index(y)
print 'Resuming dataset generation...' # X[i,:] = x
except: # t[i] = idx
X = np.empty((I,I)) # densities = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)];
t = np.empty(I) #
profiles = [] # print "Generating the dataset..."
N = 0 # N = 1000
pass # Y = np.empty((N, len(profiles)))
# X = np.empty((N,3))
# t = []
#Generates new data #
print "Getting some good profiles..." # for i in range(N):
densities = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)] if N else []; # x = resample(X, np.bincount(t), densities, step)
X.resize((N+I, 3), refcheck=False) # for j,y in enumerate(profiles):
t.resize(N+I, refcheck=False) # T = execution_handler(x, os.devnull, decode(map(int, y)))
# Y[i,j] = 2*1e-9*x[0]*x[1]*x[2]/T
for i in range(I): # idx = np.argmax(Y[i,:])
tbincount = np.bincount(t[0:i+1].astype(int)) # X[i,:] = x
x = resample(X, tbincount, densities, step) # t = np.argmax(Y[:i+1,], axis=1)
y = execution_handler(x) # densities[idx].fit(X[t==idx,:])
if y not in profiles: #
profiles.append(y) # np.savetxt(os.path.join(path,"profiles.csv"), profiles)
densities.append(KernelDensity(kernel='gaussian', bandwidth=2*step)) # np.savetxt(os.path.join(path,"X.csv"), X)
idx = profiles.index(y) # np.savetxt(os.path.join(path,"Y.csv"), Y)
X[N+i,:] = x
t[N+i] = idx
densities[idx].fit(X[t[0:N+i+1]==idx,:])
np.savetxt(os.path.join(path,"X.csv"), X)
np.savetxt(os.path.join(path,"t.csv"), t)
np.savetxt(os.path.join(path,"profiles.csv"), profiles)
print "Generating the dataset..."
N = 500
Y = np.empty((N, len(profiles)))
X = np.empty((N,3))
t = []
for i in range(N):
x = resample(X, np.bincount(t), densities, step)
for j,y in enumerate(profiles):
T = execution_handler(x, os.devnull, decode(map(int, y)))
Y[i,j] = 2*1e-9*x[0]*x[1]*x[2]/T
idx = np.argmax(Y[i,:])
X[i,:] = x
t = np.argmax(Y[:i+1,], axis=1)
densities[idx].fit(X[t==idx,:])
np.savetxt(os.path.join(path,"Y.csv"), Y)
profiles = np.loadtxt(os.path.join(path,"profiles.csv"))
X = np.loadtxt(os.path.join(path,"X.csv"))
Y = np.loadtxt(os.path.join(path,"Y.csv"))
return X, Y, profiles return X, Y, profiles

View File

@@ -2,41 +2,57 @@ from sklearn import *;
from sklearn import ensemble; from sklearn import ensemble;
import numpy as np import numpy as np
import scipy as sp import scipy as sp
from pybrain.datasets import SupervisedDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure import LinearLayer, TanhLayer, SigmoidLayer, SoftmaxLayer, FeedForwardNetwork, BiasUnit
from pybrain.tools.neuralnets import NNregression, Trainer
def train_model(X, Y, profiles): def train_model(X, Y, profiles):
#Preprocessing #Preprocessing
scaler = preprocessing.StandardScaler().fit(X); Xmean = np.mean(X, axis=0)
X = scaler.transform(X); Xstd = np.std(X, axis=0)
ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile X = (X - Xmean)/Xstd
Ymax = np.max(Y)
Y = Y/Ymax
ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile
#Cross-validation data-sets #Cross-validation data-sets
cut = int(0.5*X.shape[0]+1); cut = int(0.1*X.shape[0]+1)
XTr = X[0:cut, :]; XTr = X[0:cut, :]
YTr = Y[0:cut, :]; YTr = Y[0:cut, :]
XTe = X[cut:,:]; XTe = X[cut:,:]
YTe = Y[cut:,:]; YTe = Y[cut:,:]
#Train the model #Train the model
print("Training the model..."); print("Training the model...")
clf = linear_model.LinearRegression().fit(XTr,YTr); ds = SupervisedDataSet(X.shape[1], Y.shape[1])
for idx, x in enumerate(X):
ds.addSample(x, Y[idx,:])
clf = buildNetwork(*[X.shape[1], 100, Y.shape[1]], hiddenclass = TanhLayer, outclass = LinearLayer)
#print fnn;
#trainer = RPropMinusTrainer( fnn, dataset=ds, verbose=True);
trainer = BackpropTrainer( clf, dataset=ds, verbose=True, momentum=0.01, weightdecay=0.01, learningrate=0.002, batchlearning=False)
trainer.trainUntilConvergence(maxEpochs=100)
#Evaluate the model #Evaluate the model
GFlops = np.empty(XTe.shape[0]); GFlops = np.empty(XTe.shape[0])
speedups = np.empty(XTe.shape[0]); speedups = np.empty(XTe.shape[0])
optspeedups = np.empty(XTe.shape[0]); optspeedups = np.empty(XTe.shape[0])
for i,x in enumerate(XTe): for i,x in enumerate(XTe):
predictions = clf.predict(x); predictions = clf.activate(x)
label = np.argmax(predictions); label = np.argmax(predictions)
speedups[i] = YTe[i,label]/YTe[i,ref]; # print YTe[i,label], YTe[i,ref], np.max(YTe[i,:])
optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref]; speedups[i] = YTe[i,label]/YTe[i,ref]
GFlops[i] = YTe[i,ref]; optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref]
GFlops[i] = YTe[i,ref]*Ymax
np.set_printoptions(precision=2); np.set_printoptions(precision=2)
print("-----------------"); print("-----------------")
print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups))); print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups)))
print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups)))); print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups))))
print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)])); print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)]))
print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)])); print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)]))
print("--------"); print("--------")
print clf print clf