Tuning: Added AlexNet sizes for GEMM
This commit is contained in:
@@ -51,8 +51,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
|
||||
throw operation_not_supported_exception("Only local memory is supported for GEMM");
|
||||
|
||||
if(p_.depth > 1 && M*N*p_.depth > 2e6)
|
||||
throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
|
||||
// if(p_.depth > 1 && M*N*p_.depth > 2e6)
|
||||
// throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
|
||||
|
||||
if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
|
||||
return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;
|
||||
|
@@ -31,9 +31,10 @@ def train(X, Y, profiles):
|
||||
Y = Y[p,:]
|
||||
|
||||
#Train the model
|
||||
cut = int(0.9*M)
|
||||
XTr, YTr = X[:cut,:], Y[:cut,:]
|
||||
XCv, YCv = X[cut:,:], Y[cut:,:]
|
||||
cut = int(1.00*M)
|
||||
CV = .1
|
||||
XTr, YTr = X[:,:], Y[:,:]
|
||||
XCv, YCv = X[:max(1,CV*M),:], Y[:max(1,CV*M),:]
|
||||
|
||||
nrmses = {}
|
||||
for N in range(1,min(M+1,20)):
|
||||
|
24
tune/tune.py
24
tune/tune.py
@@ -30,9 +30,28 @@ def tune(device, operation, json_path):
|
||||
sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn]
|
||||
sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn]
|
||||
sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn]
|
||||
|
||||
#AlexNet sizes
|
||||
sizes[isc.templates.gemm_nn] = [(3025,96,363),
|
||||
(729,128,1200),
|
||||
(169,384,2304),
|
||||
(169,192,1728),
|
||||
(169,128,1728)]
|
||||
|
||||
sizes[isc.templates.gemm_nt] = [(169,1728,128),
|
||||
(169,1728,192),
|
||||
(169,2304,384),
|
||||
(729,1200,128)]
|
||||
|
||||
sizes[isc.templates.gemm_tn] = [(1728,128,169),
|
||||
(1728,192,169),
|
||||
(2304,384,169),
|
||||
(1200,128,729),
|
||||
(363,96,3025)]
|
||||
|
||||
|
||||
sizes = unique(list(sizes[operation]))
|
||||
sizes = [x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 1e-1]
|
||||
sizes = [(1536,1536,1536)]
|
||||
|
||||
|
||||
#Training data
|
||||
@@ -58,7 +77,8 @@ def tune(device, operation, json_path):
|
||||
best = (-predperf).argsort()[:5]
|
||||
perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
|
||||
predicted = profiles[best[argmax(perf)]]
|
||||
tune = not optimize.is_local_optimum(predicted, operation, x, context)
|
||||
#tune = not optimize.is_local_optimum(predicted, operation, x, context)
|
||||
tune = True
|
||||
#Retune if necessary
|
||||
if tune:
|
||||
#new = optimize.exhaustive(operation, x, context)
|
||||
|
Reference in New Issue
Block a user