Tuning: Added AlexNet sizes for GEMM

2015-07-16 14:26:21 -04:00
parent 1e3c853b58
commit 3d4c9cf1e2
3 changed files with 28 additions and 7 deletions
--- a/lib/backend/templates/gemm.cpp
+++ b/lib/backend/templates/gemm.cpp
@@ -51,8 +51,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
    if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
      throw operation_not_supported_exception("Only local memory is supported for GEMM");
-    if(p_.depth > 1 && M*N*p_.depth > 2e6)
+//    if(p_.depth > 1 && M*N*p_.depth > 2e6)
-      throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
+//      throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
    if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
      return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;
--- a/tune/model.py
+++ b/tune/model.py
@@ -31,9 +31,10 @@ def train(X, Y, profiles):
    Y = Y[p,:]   
    #Train the model
-    cut = int(0.9*M)
+    cut = int(1.00*M)
-    XTr, YTr = X[:cut,:], Y[:cut,:]
+    CV = .1
-    XCv, YCv = X[cut:,:], Y[cut:,:]
+    XTr, YTr = X[:,:], Y[:,:]
    XCv, YCv = X[:max(1,CV*M),:], Y[:max(1,CV*M),:]
    nrmses = {}
    for N in range(1,min(M+1,20)):
--- a/tune/tune.py
+++ b/tune/tune.py
@@ -30,9 +30,28 @@ def tune(device, operation, json_path):
    sizes[isc.templates.gemm_tn]     = sizes[isc.templates.gemm_nn]
    sizes[isc.templates.gemm_nt]     = sizes[isc.templates.gemm_nn]
    sizes[isc.templates.gemm_tt]     = sizes[isc.templates.gemm_nn]
    #AlexNet sizes
    sizes[isc.templates.gemm_nn]	 = [(3025,96,363),
                                        (729,128,1200),
                                        (169,384,2304),
                                        (169,192,1728),
                                        (169,128,1728)]
    sizes[isc.templates.gemm_nt]	 = [(169,1728,128),
 										(169,1728,192),
 										(169,2304,384),
 										(729,1200,128)]
    sizes[isc.templates.gemm_tn]	 = [(1728,128,169), 
 										(1728,192,169),
 										(2304,384,169),
 										(1200,128,729),
 										(363,96,3025)]
    sizes = unique(list(sizes[operation]))
    sizes = [x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 1e-1]
    sizes = [(1536,1536,1536)]
    #Training data
@@ -58,7 +77,8 @@ def tune(device, operation, json_path):
                best = (-predperf).argsort()[:5]
                perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
                predicted = profiles[best[argmax(perf)]]
-            tune = not optimize.is_local_optimum(predicted, operation, x, context)     
+            #tune = not optimize.is_local_optimum(predicted, operation, x, context)     
            tune = True
        #Retune if necessary
        if tune:
            #new = optimize.exhaustive(operation, x, context)