From 3d4c9cf1e29a5a6e762701a9d0e4686807772b95 Mon Sep 17 00:00:00 2001
From: Philippe Tillet <ptillet@g.harvard.edu>
Date: Thu, 16 Jul 2015 14:26:21 -0400
Subject: [PATCH] Tuning: Added AlexNet sizes for GEMM

---
 lib/backend/templates/gemm.cpp |  4 ++--
 tune/model.py                  |  7 ++++---
 tune/tune.py                   | 24 ++++++++++++++++++++++--
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/lib/backend/templates/gemm.cpp b/lib/backend/templates/gemm.cpp
index 8cf141e9e..44fd5f155 100644
--- a/lib/backend/templates/gemm.cpp
+++ b/lib/backend/templates/gemm.cpp
@@ -51,8 +51,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
     if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
       throw operation_not_supported_exception("Only local memory is supported for GEMM");
 
-    if(p_.depth > 1 && M*N*p_.depth > 2e6)
-      throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
+//    if(p_.depth > 1 && M*N*p_.depth > 2e6)
+//      throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
 
     if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
       return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;
diff --git a/tune/model.py b/tune/model.py
index 11cbdcb35..3961226ee 100644
--- a/tune/model.py
+++ b/tune/model.py
@@ -31,9 +31,10 @@ def train(X, Y, profiles):
     Y = Y[p,:]   
 
     #Train the model
-    cut = int(0.9*M)
-    XTr, YTr = X[:cut,:], Y[:cut,:]
-    XCv, YCv = X[cut:,:], Y[cut:,:]
+    cut = int(1.00*M)
+    CV = .1
+    XTr, YTr = X[:,:], Y[:,:]
+    XCv, YCv = X[:max(1,CV*M),:], Y[:max(1,CV*M),:]
 
     nrmses = {}
     for N in range(1,min(M+1,20)):
diff --git a/tune/tune.py b/tune/tune.py
index c33a9e0fb..3af5cfd1f 100644
--- a/tune/tune.py
+++ b/tune/tune.py
@@ -30,9 +30,28 @@ def tune(device, operation, json_path):
     sizes[isc.templates.gemm_tn]     = sizes[isc.templates.gemm_nn]
     sizes[isc.templates.gemm_nt]     = sizes[isc.templates.gemm_nn]
     sizes[isc.templates.gemm_tt]     = sizes[isc.templates.gemm_nn]
+    
+    #AlexNet sizes
+    sizes[isc.templates.gemm_nn]	 = [(3025,96,363),
+                                        (729,128,1200),
+                                        (169,384,2304),
+                                        (169,192,1728),
+                                        (169,128,1728)]
+    
+    sizes[isc.templates.gemm_nt]	 = [(169,1728,128),
+										(169,1728,192),
+										(169,2304,384),
+										(729,1200,128)]
+    
+    sizes[isc.templates.gemm_tn]	 = [(1728,128,169), 
+										(1728,192,169),
+										(2304,384,169),
+										(1200,128,729),
+										(363,96,3025)]
+    
+    
     sizes = unique(list(sizes[operation]))
     sizes = [x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 1e-1]
-    sizes = [(1536,1536,1536)]
 
 
     #Training data
@@ -58,7 +77,8 @@ def tune(device, operation, json_path):
                 best = (-predperf).argsort()[:5]
                 perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
                 predicted = profiles[best[argmax(perf)]]
-            tune = not optimize.is_local_optimum(predicted, operation, x, context)     
+            #tune = not optimize.is_local_optimum(predicted, operation, x, context)     
+            tune = True
         #Retune if necessary
         if tune:
             #new = optimize.exhaustive(operation, x, context)