From 3d4c9cf1e29a5a6e762701a9d0e4686807772b95 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Thu, 16 Jul 2015 14:26:21 -0400 Subject: [PATCH] Tuning: Added AlexNet sizes for GEMM --- lib/backend/templates/gemm.cpp | 4 ++-- tune/model.py | 7 ++++--- tune/tune.py | 24 ++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/lib/backend/templates/gemm.cpp b/lib/backend/templates/gemm.cpp index 8cf141e9e..44fd5f155 100644 --- a/lib/backend/templates/gemm.cpp +++ b/lib/backend/templates/gemm.cpp @@ -51,8 +51,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL) throw operation_not_supported_exception("Only local memory is supported for GEMM"); - if(p_.depth > 1 && M*N*p_.depth > 2e6) - throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB"); +// if(p_.depth > 1 && M*N*p_.depth > 2e6) +// throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB"); if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0) return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE; diff --git a/tune/model.py b/tune/model.py index 11cbdcb35..3961226ee 100644 --- a/tune/model.py +++ b/tune/model.py @@ -31,9 +31,10 @@ def train(X, Y, profiles): Y = Y[p,:] #Train the model - cut = int(0.9*M) - XTr, YTr = X[:cut,:], Y[:cut,:] - XCv, YCv = X[cut:,:], Y[cut:,:] + cut = int(1.00*M) + CV = .1 + XTr, YTr = X[:,:], Y[:,:] + XCv, YCv = X[:max(1,CV*M),:], Y[:max(1,CV*M),:] nrmses = {} for N in range(1,min(M+1,20)): diff --git a/tune/tune.py b/tune/tune.py index c33a9e0fb..3af5cfd1f 100644 --- a/tune/tune.py +++ b/tune/tune.py @@ -30,9 +30,28 @@ def tune(device, operation, json_path): sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn] sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn] sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn] + + #AlexNet sizes + sizes[isc.templates.gemm_nn] = [(3025,96,363), + (729,128,1200), + (169,384,2304), + (169,192,1728), + (169,128,1728)] + + sizes[isc.templates.gemm_nt] = [(169,1728,128), + (169,1728,192), + (169,2304,384), + (729,1200,128)] + + sizes[isc.templates.gemm_tn] = [(1728,128,169), + (1728,192,169), + (2304,384,169), + (1200,128,729), + (363,96,3025)] + + sizes = unique(list(sizes[operation])) sizes = [x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 1e-1] - sizes = [(1536,1536,1536)] #Training data @@ -58,7 +77,8 @@ def tune(device, operation, json_path): best = (-predperf).argsort()[:5] perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best] predicted = profiles[best[argmax(perf)]] - tune = not optimize.is_local_optimum(predicted, operation, x, context) + #tune = not optimize.is_local_optimum(predicted, operation, x, context) + tune = True #Retune if necessary if tune: #new = optimize.exhaustive(operation, x, context)