Tuning: Added AlexNet sizes for GEMM

This commit is contained in:
Philippe Tillet
2015-07-16 14:26:21 -04:00
parent 1e3c853b58
commit 3d4c9cf1e2
3 changed files with 28 additions and 7 deletions

View File

@@ -51,8 +51,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
throw operation_not_supported_exception("Only local memory is supported for GEMM");
if(p_.depth > 1 && M*N*p_.depth > 2e6)
throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
// if(p_.depth > 1 && M*N*p_.depth > 2e6)
// throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;

View File

@@ -31,9 +31,10 @@ def train(X, Y, profiles):
Y = Y[p,:]
#Train the model
cut = int(0.9*M)
XTr, YTr = X[:cut,:], Y[:cut,:]
XCv, YCv = X[cut:,:], Y[cut:,:]
cut = int(1.00*M)
CV = .1
XTr, YTr = X[:,:], Y[:,:]
XCv, YCv = X[:max(1,CV*M),:], Y[:max(1,CV*M),:]
nrmses = {}
for N in range(1,min(M+1,20)):

View File

@@ -30,9 +30,28 @@ def tune(device, operation, json_path):
sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn]
sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn]
sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn]
#AlexNet sizes
sizes[isc.templates.gemm_nn] = [(3025,96,363),
(729,128,1200),
(169,384,2304),
(169,192,1728),
(169,128,1728)]
sizes[isc.templates.gemm_nt] = [(169,1728,128),
(169,1728,192),
(169,2304,384),
(729,1200,128)]
sizes[isc.templates.gemm_tn] = [(1728,128,169),
(1728,192,169),
(2304,384,169),
(1200,128,729),
(363,96,3025)]
sizes = unique(list(sizes[operation]))
sizes = [x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 1e-1]
sizes = [(1536,1536,1536)]
#Training data
@@ -58,7 +77,8 @@ def tune(device, operation, json_path):
best = (-predperf).argsort()[:5]
perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
predicted = profiles[best[argmax(perf)]]
tune = not optimize.is_local_optimum(predicted, operation, x, context)
#tune = not optimize.is_local_optimum(predicted, operation, x, context)
tune = True
#Retune if necessary
if tune:
#new = optimize.exhaustive(operation, x, context)