Tuning: Bugfixes and loosened local minima check

This commit is contained in:
Philippe Tillet
2015-07-12 23:19:00 -07:00
parent 1257dda310
commit e2f45f55f3
2 changed files with 16 additions and 11 deletions

View File

@@ -159,16 +159,16 @@ def is_local_optimum(parameters, template, sizes, context):
tree, _ = tools.tree_of(template, sizes, context)
genetic_infos = tools.genetic_infos_of(template)
if issubclass(template, isc.axpy):
if issubclass(template, isc.templates.axpy):
sweep_over = [0,1,2]
elif issubclass(template, isc.dot):
elif issubclass(template, isc.templates.dot):
sweep_over = [0,1,2]
elif issubclass(template, isc.ger):
elif issubclass(template, isc.templates.ger):
sweep_over = [0,1,2,3,4]
elif issubclass(template, isc.gemv):
elif issubclass(template, isc.templates.gemv):
sweep_over = [0,1,2,3,4]
elif issubclass(template, isc.gemm):
sweep_over = [1,2,3,4,5,7,10,11]
elif issubclass(template, isc.templates.gemm):
sweep_over = [1,3,5,7]
#Evaluate the provided parameters guess
try:

View File

@@ -26,8 +26,7 @@ def tune(device, operation, json_path):
sizes[isc.templates.axpy] = [(x,) for x in tools.expspace(1e3, 1e7, 4)]
sizes[isc.templates.gemv_n] = product(pow2range(4,17), pow2range(4,17))
sizes[isc.templates.gemv_t] = sizes[isc.templates.gemv_n]
sizes[isc.templates.gemm_nn] = product(pow2range(5, 10), pow2range(5, 10), pow2range(5, 10))
sizes[isc.templates.gemm_nn] = [(128, 169, 1728)]
sizes[isc.templates.gemm_nn] = product(pow2range(6, 12), pow2range(6, 12), pow2range(6, 12))
sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn]
sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn]
sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn]
@@ -68,14 +67,20 @@ def tune(device, operation, json_path):
if idx > 0:
for xx,yy in zip(X, Y):
_tree, _operands = tools.tree_of(operation, xx, context)
time = tools.benchmark(operation, new, _tree)
perf = performance(xx, time)
try:
time = tools.benchmark(operation, new, _tree)
perf = performance(xx, time)
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
perf = 0
yy.append(0 if isinf(perf) else perf)
#Update dataset
y = []
fastest = max(predperf) if nparams > 1 else None
for ip, p in enumerate(profiles):
perf = 0 if fastest and ip < nparams and predperf[ip]/fastest < .1 else performance(x,tools.benchmark(operation, p, tree))
try:
perf = 0 if fastest and ip < nparams and predperf[ip]/fastest < .1 else performance(x,tools.benchmark(operation, p, tree))
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
perf = 0
y.append(0 if isinf(perf) else perf)
X.append(x)
Y.append(y)