Fixed over-head bug in the auto-tuner (not in the benchmarks)

2014-11-06 16:14:46 -05:00
parent 8dd492de23
commit 6595d20c34
9 changed files with 110 additions and 63 deletions
--- a/python/autotune/pysrc/model.py
+++ b/python/autotune/pysrc/model.py
@@ -1,5 +1,6 @@
 from sklearn import tree
 from sklearn import ensemble
+from sklearn.grid_search import GridSearchCV
 import numpy as np

 def gmean(a, axis=0, dtype=None):
@@ -20,7 +21,6 @@ def nrmse(y_ground, y):
    return rmsd/(np.max(y_ground) - np.min(y_ground))

 def train_model(X, Y, profiles, metric):
-    #Shuffle
    p = np.random.permutation(X.shape[0])
    X = X[p,:]
    Y = Y[p,:]
@@ -28,18 +28,34 @@ def train_model(X, Y, profiles, metric):
    Ymax = np.max(Y)
    Y = Y/Ymax
    #Train the model
-    cut = int(0.9*X.shape[0])
-    nrmses = {}
-    for depth in range(1,10):
-        clf = ensemble.RandomForestRegressor(5, max_depth=4).fit(X[:cut,:], Y[:cut,:])
-        t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
-        y = np.array([Y[cut+i,t[i]] for i in range(t.size)])
-        y_ground = np.min(Y[cut:,:], axis=1)
-        # for i in range(t.size):
-        #     print X[cut+i,:], y[i], y_ground[i]
-        nrmses[clf] = nrmse(y_ground, y)
-        print depth, nrmses[clf]
+    cut = int(0.95*X.shape[0])

+    XTr, YTr = X[:cut,:], Y[:cut,:]
+    XCv, YCv = X[cut:,:], Y[cut:,:]
+
+    nrmses = {}
+    for N in range(1,10):
+        for depth in range(1,5):
+            clf = ensemble.RandomForestRegressor(N, max_depth=depth).fit(XTr, YTr)
+            t = np.argmin(clf.predict(XCv), axis = 1)
+            y = np.array([YCv[i,t[i]] for i in range(t.size)])
+            nrmses[clf] = nrmse(np.min(YCv[:,:], axis=1), y)
    clf = min(nrmses, key=nrmses.get)

+    t = np.argmin(clf.predict(XCv), axis = 1)
+    s = np.array([y[0]/y[k] for y,k in zip(YCv, t)])
+    tt = np.argmin(YCv, axis = 1)
+    ss = np.array([y[0]/y[k] for y,k in zip(YCv, tt)])
+
+    p5 = lambda a: np.percentile(a, 5)
+    p25 = lambda a: np.percentile(a, 25)
+    p50 = lambda a: np.percentile(a, 50)
+    p75 = lambda a: np.percentile(a, 75)
+    p95 = lambda a: np.percentile(a, 95)
+
+    print("Percentile     :\t 5 \t 25 \t 50 \t 75 \t 95")
+    print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s)))
+    print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss)))
+
+    print clf
    return clf