Tuning: Added AlexNet sizes for GEMM
This commit is contained in:
@@ -51,8 +51,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
|||||||
if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
|
if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
|
||||||
throw operation_not_supported_exception("Only local memory is supported for GEMM");
|
throw operation_not_supported_exception("Only local memory is supported for GEMM");
|
||||||
|
|
||||||
if(p_.depth > 1 && M*N*p_.depth > 2e6)
|
// if(p_.depth > 1 && M*N*p_.depth > 2e6)
|
||||||
throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
|
// throw operation_not_supported_exception("This would necessitate a temporary larger than 1MB");
|
||||||
|
|
||||||
if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
|
if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
|
||||||
return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;
|
return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;
|
||||||
|
@@ -31,9 +31,10 @@ def train(X, Y, profiles):
|
|||||||
Y = Y[p,:]
|
Y = Y[p,:]
|
||||||
|
|
||||||
#Train the model
|
#Train the model
|
||||||
cut = int(0.9*M)
|
cut = int(1.00*M)
|
||||||
XTr, YTr = X[:cut,:], Y[:cut,:]
|
CV = .1
|
||||||
XCv, YCv = X[cut:,:], Y[cut:,:]
|
XTr, YTr = X[:,:], Y[:,:]
|
||||||
|
XCv, YCv = X[:max(1,CV*M),:], Y[:max(1,CV*M),:]
|
||||||
|
|
||||||
nrmses = {}
|
nrmses = {}
|
||||||
for N in range(1,min(M+1,20)):
|
for N in range(1,min(M+1,20)):
|
||||||
|
24
tune/tune.py
24
tune/tune.py
@@ -30,9 +30,28 @@ def tune(device, operation, json_path):
|
|||||||
sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn]
|
sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn]
|
||||||
sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn]
|
sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn]
|
||||||
sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn]
|
sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn]
|
||||||
|
|
||||||
|
#AlexNet sizes
|
||||||
|
sizes[isc.templates.gemm_nn] = [(3025,96,363),
|
||||||
|
(729,128,1200),
|
||||||
|
(169,384,2304),
|
||||||
|
(169,192,1728),
|
||||||
|
(169,128,1728)]
|
||||||
|
|
||||||
|
sizes[isc.templates.gemm_nt] = [(169,1728,128),
|
||||||
|
(169,1728,192),
|
||||||
|
(169,2304,384),
|
||||||
|
(729,1200,128)]
|
||||||
|
|
||||||
|
sizes[isc.templates.gemm_tn] = [(1728,128,169),
|
||||||
|
(1728,192,169),
|
||||||
|
(2304,384,169),
|
||||||
|
(1200,128,729),
|
||||||
|
(363,96,3025)]
|
||||||
|
|
||||||
|
|
||||||
sizes = unique(list(sizes[operation]))
|
sizes = unique(list(sizes[operation]))
|
||||||
sizes = [x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 1e-1]
|
sizes = [x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 1e-1]
|
||||||
sizes = [(1536,1536,1536)]
|
|
||||||
|
|
||||||
|
|
||||||
#Training data
|
#Training data
|
||||||
@@ -58,7 +77,8 @@ def tune(device, operation, json_path):
|
|||||||
best = (-predperf).argsort()[:5]
|
best = (-predperf).argsort()[:5]
|
||||||
perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
|
perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
|
||||||
predicted = profiles[best[argmax(perf)]]
|
predicted = profiles[best[argmax(perf)]]
|
||||||
tune = not optimize.is_local_optimum(predicted, operation, x, context)
|
#tune = not optimize.is_local_optimum(predicted, operation, x, context)
|
||||||
|
tune = True
|
||||||
#Retune if necessary
|
#Retune if necessary
|
||||||
if tune:
|
if tune:
|
||||||
#new = optimize.exhaustive(operation, x, context)
|
#new = optimize.exhaustive(operation, x, context)
|
||||||
|
Reference in New Issue
Block a user