Tuner: made the auto-tuner compatible with the new python API
This commit is contained in:
@@ -30,7 +30,7 @@ def train(X, Y, profiles):
|
|||||||
X = X[p,:]
|
X = X[p,:]
|
||||||
Y = Y[p,:]
|
Y = Y[p,:]
|
||||||
|
|
||||||
#Train the model
|
#Train the.profile
|
||||||
cut = int(1.00*M)
|
cut = int(1.00*M)
|
||||||
CV = .1
|
CV = .1
|
||||||
XTr, YTr = X[:,:], Y[:,:]
|
XTr, YTr = X[:,:], Y[:,:]
|
||||||
|
@@ -14,10 +14,10 @@ from numpy import cumsum
|
|||||||
|
|
||||||
import tools
|
import tools
|
||||||
|
|
||||||
fetch_types = [isc.templates.fetching_policy_type.FETCH_FROM_GLOBAL_CONTIGUOUS,
|
fetch_types = [isc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS,
|
||||||
isc.templates.fetching_policy_type.FETCH_FROM_GLOBAL_STRIDED,
|
isc.templates.FETCH_FROM_GLOBAL_STRIDED,
|
||||||
isc.templates.fetching_policy_type.FETCH_FROM_LOCAL,
|
isc.templates.FETCH_FROM_LOCAL,
|
||||||
isc.templates.fetching_policy_type.FETCH_FROM_LOCAL]
|
isc.templates.FETCH_FROM_LOCAL]
|
||||||
|
|
||||||
def exhaustive(template, sizes, context):
|
def exhaustive(template, sizes, context):
|
||||||
tree, _ = tools.tree_of(template, sizes, context)
|
tree, _ = tools.tree_of(template, sizes, context)
|
||||||
|
@@ -21,13 +21,13 @@ def expspace(a,b,N,r=128):
|
|||||||
|
|
||||||
def benchmark(template, setting, tree):
|
def benchmark(template, setting, tree):
|
||||||
queue = tree.context.queues[0]
|
queue = tree.context.queues[0]
|
||||||
queue.models[template, isc.float32] = isc.model(isc.float32, template(*setting), queue)
|
queue.profiles[template, isc.float32] = isc.profile(template(*setting), isc.float32, queue)
|
||||||
times = []
|
times = []
|
||||||
total = 0
|
total = 0
|
||||||
i = 0
|
i = 0
|
||||||
while total < 1e-2:
|
while total < 1e-2:
|
||||||
#z = isc.zeros(1, 10000000, isc.float32, tree.context)
|
#z = isc.zeros(1, 10000000, isc.float32, tree.context)
|
||||||
z, events = isc.enqueue(tree)
|
z, events = isc.driver.enqueue(tree)
|
||||||
tree.context.queues[0].synchronize()
|
tree.context.queues[0].synchronize()
|
||||||
times.append(1e-9*sum([e.elapsed_time for e in events]))
|
times.append(1e-9*sum([e.elapsed_time for e in events]))
|
||||||
total += times[-1]
|
total += times[-1]
|
||||||
|
14
tune/tune.py
14
tune/tune.py
@@ -22,8 +22,8 @@ def pow2range(a, b):
|
|||||||
|
|
||||||
def tune(device, operation, json_path):
|
def tune(device, operation, json_path):
|
||||||
#List devices
|
#List devices
|
||||||
platforms = isc.get_platforms()
|
platforms = isc.driver.get_platforms()
|
||||||
context = isc.context(device)
|
context = isc.driver.context(device)
|
||||||
|
|
||||||
#List of size tuples to use
|
#List of size tuples to use
|
||||||
sizes = {}
|
sizes = {}
|
||||||
@@ -83,7 +83,7 @@ def tune(device, operation, json_path):
|
|||||||
predicted = profiles[0]
|
predicted = profiles[0]
|
||||||
else:
|
else:
|
||||||
clf = ensemble.RandomForestRegressor(min(10, idx+1), max_depth=min(10, idx+1)).fit(X, Y)
|
clf = ensemble.RandomForestRegressor(min(10, idx+1), max_depth=min(10, idx+1)).fit(X, Y)
|
||||||
#clf, nrmse = model.train(X, Y, profiles)
|
#clf, nrmse = profile.train(X, Y, profiles)
|
||||||
predperf = clf.predict(x)[0]
|
predperf = clf.predict(x)[0]
|
||||||
best = (-predperf).argsort()[:5]
|
best = (-predperf).argsort()[:5]
|
||||||
perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
|
perf = [performance(x, tools.benchmark(operation, profiles[b], tree)) for b in best]
|
||||||
@@ -130,7 +130,7 @@ def tune(device, operation, json_path):
|
|||||||
json_data[operation_name]['float32'] = {}
|
json_data[operation_name]['float32'] = {}
|
||||||
D = json_data[operation_name]['float32']
|
D = json_data[operation_name]['float32']
|
||||||
if len(profiles) > 1:
|
if len(profiles) > 1:
|
||||||
clf, nrmse = model.train(X, Y, profiles)
|
clf, nrmse = profile.train(X, Y, profiles)
|
||||||
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
|
D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
|
||||||
'children_right': e.tree_.children_right.tolist(),
|
'children_right': e.tree_.children_right.tolist(),
|
||||||
'threshold': e.tree_.threshold.astype('float64').tolist(),
|
'threshold': e.tree_.threshold.astype('float64').tolist(),
|
||||||
@@ -141,7 +141,7 @@ def tune(device, operation, json_path):
|
|||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
platforms = isc.get_platforms()
|
platforms = isc.driver.get_platforms()
|
||||||
devices = [d for platform in platforms for d in platform.get_devices()]
|
devices = [d for platform in platforms for d in platform.get_devices()]
|
||||||
#Command line arguments
|
#Command line arguments
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
@@ -156,7 +156,7 @@ def parse_arguments():
|
|||||||
print("----------------")
|
print("----------------")
|
||||||
for (i, d) in enumerate(devices):
|
for (i, d) in enumerate(devices):
|
||||||
selected = '[' + ('x' if device==d else ' ') + ']'
|
selected = '[' + ('x' if device==d else ' ') + ']'
|
||||||
print selected , '-', isc.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name
|
print selected , '-', isc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name
|
||||||
print("----------------")
|
print("----------------")
|
||||||
|
|
||||||
|
|
||||||
@@ -169,7 +169,7 @@ def parse_arguments():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
isc.state.queue_properties = isc.CL_QUEUE_PROFILING_ENABLE
|
isc.driver.default.queue_properties = isc.driver.PROFILING_ENABLE
|
||||||
args = parse_arguments()
|
args = parse_arguments()
|
||||||
tune(*args)
|
tune(*args)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user