diff --git a/python/src/bind/common.hpp b/python/src/bind/common.hpp index fe7ecb812..7dbc0e75a 100644 --- a/python/src/bind/common.hpp +++ b/python/src/bind/common.hpp @@ -72,7 +72,7 @@ namespace tools if(name=="elementwise_1d") return sc::AXPY_TYPE; else if(name=="elementwise_2d") return sc::GER_TYPE; - else if(name=="dot") return sc::DOT_TYPE; + else if(name=="reduce_1d") return sc::DOT_TYPE; else if(name=="reduce_2d_rows") return sc::GEMV_N_TYPE; else if(name=="reduce_2d_cols") return sc::GEMV_T_TYPE; else if(name=="matrix_product_nn") return sc::GEMM_NN_TYPE; diff --git a/python/src/bind/kernels.cpp b/python/src/bind/kernels.cpp index d55b2d017..74cb192ae 100644 --- a/python/src/bind/kernels.cpp +++ b/python/src/bind/kernels.cpp @@ -1,7 +1,7 @@ #include "isaac/kernels/templates/elementwise_1d.h" #include "isaac/kernels/templates/elementwise_2d.h" -#include "isaac/kernels/templates/dot.h" -#include "isaac/kernels/templates/gemv.h" +#include "isaac/kernels/templates/reduce_1d.h" +#include "isaac/kernels/templates/reduce_2d.h" #include "isaac/kernels/templates/matrix_product.h" #include "common.hpp" @@ -58,15 +58,15 @@ void export_templates() //Vector AXPY WRAP_SINGLE_TEMPLATE(elementwise_1d, uint, uint, uint, tpt::fetching_policy_type) WRAP_SINGLE_TEMPLATE(elementwise_2d, uint, uint, uint, uint, uint, tpt::fetching_policy_type) - WRAP_SINGLE_TEMPLATE(dot, uint, uint, uint, tpt::fetching_policy_type) - WRAP_BASE(gemv) - WRAP_TEMPLATE(reduce_2d_rows, gemv, uint, uint, uint, uint, uint, tpt::fetching_policy_type) - WRAP_TEMPLATE(reduce_2d_cols, gemv, uint, uint, uint, uint, uint, tpt::fetching_policy_type) + WRAP_SINGLE_TEMPLATE(reduce_1d, uint, uint, uint, tpt::fetching_policy_type) + WRAP_BASE(reduce_2d) + WRAP_TEMPLATE(reduce_2d_rows, reduce_2d, uint, uint, uint, uint, uint, tpt::fetching_policy_type) + WRAP_TEMPLATE(reduce_2d_cols, reduce_2d, uint, uint, uint, uint, uint, tpt::fetching_policy_type) WRAP_BASE(matrix_product) - WRAP_TEMPLATE(matrix_product_nn, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint) - WRAP_TEMPLATE(matrix_product_tn, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint) - WRAP_TEMPLATE(matrix_product_nt, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint) - WRAP_TEMPLATE(matrix_product_tt, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint) + WRAP_TEMPLATE(matrix_product_nn, matrix_product, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint) + WRAP_TEMPLATE(matrix_product_tn, matrix_product, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint) + WRAP_TEMPLATE(matrix_product_nt, matrix_product, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint) + WRAP_TEMPLATE(matrix_product_tt, matrix_product, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint) } diff --git a/tune/android/main.py b/tune/android/main.py index 12e3daa56..1b3553b28 100644 --- a/tune/android/main.py +++ b/tune/android/main.py @@ -10,7 +10,7 @@ from tune.tune import Tuner from tune.tools import metric_name_of #Kivy -from kivy.logelementwise_2d import Logger +from kivy.logger import Logger from kivy.uix.scrollview import ScrollView from kivy.uix.boxlayout import BoxLayout from kivy.uix.label import Label @@ -122,7 +122,7 @@ class IsaacApp(App): self.show_tune() #Logelementwise_2d - self.logelementwise_2d = LabelLogger(self.screens['Tune'].ids.out) + self.logger = LabelLogger(self.screens['Tune'].ids.out) if on_android: @run_on_ui_thread @@ -145,8 +145,8 @@ class IsaacApp(App): device = next(x for x in self.isaac_handler.devices if x.name==self.config.get('hardware', 'device')) #FIXME: Move profiling logics into tuning sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE - self.logelementwise_2d.info('Using ' + device.name) - self.logelementwise_2d.info('') + self.logger.info('Using ' + device.name) + self.logger.info('') def run(): if on_android: @@ -156,8 +156,8 @@ class IsaacApp(App): ('blas3', (sc.templates.matrix_product_nn, sc.templates.gemm_tn, sc.templates.gemm_nt, sc.templates.gemm_tt))] for opclass, optype in operations: for op in optype: - progress_bar = LabelProgressBar(10, self.logelementwise_2d.label, metric_name_of(op)) - tuner = Tuner(self.logelementwise_2d, device, op, json_path='', progress_bar=progress_bar) + progress_bar = LabelProgressBar(10, self.logger.label, metric_name_of(op)) + tuner = Tuner(self.logger, device, op, json_path='', progress_bar=progress_bar) tuner.run(self.config.get('autotuning', opclass).lower()) tid = thread.start_new_thread(run, ()) diff --git a/tune/android/tune/optimize.py b/tune/android/tune/optimize.py index 32b14ffdc..a4c169f4d 100644 --- a/tune/android/tune/optimize.py +++ b/tune/android/tune/optimize.py @@ -44,8 +44,8 @@ def exhaustive(template, sizes, context): class GeneticOptimizer: - def __init__(self, logelementwise_2d, naccept=500, niter=1000, cxpb=.4, mutpb=.4, popsize=10, progress_bar = None): - self.logelementwise_2d = logger + def __init__(self, logger, naccept=500, niter=1000, cxpb=.4, mutpb=.4, popsize=10, progress_bar = None): + self.logger = logger self.naccept = naccept self.niter = niter self.cxpb = cxpb diff --git a/tune/android/tune/tools.py b/tune/android/tune/tools.py index 7b4e4d026..85e4e84ec 100644 --- a/tune/android/tune/tools.py +++ b/tune/android/tune/tools.py @@ -46,27 +46,27 @@ def tree_of(template, sizes, context): N, = sizes x = sc.empty(N, context=context) y = sc.empty(N, context=context) - return sc.reduce_1d(x, y), (x, y) + return sc.dot(x, y), (x, y) elif issubclass(template, sc.templates.elementwise_2d): M, N = sizes A = sc.empty((M,N), context=context) B = sc.empty((M,N), context=context) return A + B, (A, B) - elif issubclass(template, sc.templates.gemv): + elif issubclass(template, sc.templates.reduce_2d): T = template is sc.templates.reduce_2d_cols M, N = sizes[::-1] if T else sizes A = sc.empty((M,N), context=context) x = sc.empty(N, context=context) - return sc.reduce_1d(A.T, x) if T else sc.dot(A, x), (A, x) + return sc.dot(A.T, x) if T else sc.dot(A, x), (A, x) elif issubclass(template, sc.templates.matrix_product): - AT = template is sc.templates.matrix_product_tn or template is sc.templates.gemm_tt - BT = template is sc.templates.matrix_product_nt or template is sc.templates.gemm_tt + AT = template is sc.templates.matrix_product_tn or template is sc.templates.matrix_product_tt + BT = template is sc.templates.matrix_product_nt or template is sc.templates.matrix_product_tt M, N, K = sizes A = sc.empty((K, M) if AT else (M, K), context=context) B = sc.empty((N, K) if BT else (K, N), context=context) AA = A.T if AT else A BB = B.T if BT else B - return sc.reduce_1d(AA, BB), (A, B) + return sc.dot(AA, BB), (A, B) def memory_footprint(template, sizes): if issubclass(template, sc.templates.elementwise_1d): @@ -75,13 +75,13 @@ def memory_footprint(template, sizes): return 4*2*sizes[0]*1e-9 elif issubclass(template, sc.templates.elementwise_2d): return 4*sizes[0]*sizes[1]*1e-9 - elif issubclass(template, sc.templates.gemv): + elif issubclass(template, sc.templates.reduce_2d): return 4*sizes[0]*sizes[1]*1e-9 elif issubclass(template, sc.templates.matrix_product): return 4*(sizes[0]*sizes[1] + sizes[0]*sizes[2] + sizes[1]*sizes[2])*1e-9 def metric_of(template): - memory_bound = [sc.templates.elementwise_1d, sc.templates.reduce_1d, sc.templates.elementwise_2d, sc.templates.gemv] + memory_bound = [sc.templates.elementwise_1d, sc.templates.reduce_1d, sc.templates.elementwise_2d, sc.templates.reduce_2d] compute_bound = [sc.templates.matrix_product] if any([issubclass(template, x) for x in memory_bound]): return lambda sizes, t: memory_footprint(template, sizes)/t @@ -100,7 +100,7 @@ def genetic_infos_of(template): return {'categorical': [3], 'nbits':[3,4,4,2]} elif issubclass(template, sc.templates.elementwise_2d): return {'categorical': [5], 'nbits': [3,3,3,3,4,2]} - elif issubclass(template, sc.templates.gemv): + elif issubclass(template, sc.templates.reduce_2d): return {'categorical': [5], 'nbits': [3,3,3,3,4,2]} elif issubclass(template, sc.templates.matrix_product): return {'categorical': [8,9], 'nbits': [3,3,3,3,3,2,2,2,2,2,3,3]} diff --git a/tune/android/tune/tune.py b/tune/android/tune/tune.py index 152e1c879..707147b66 100644 --- a/tune/android/tune/tune.py +++ b/tune/android/tune/tune.py @@ -26,8 +26,8 @@ def pow2range(a, b): class Tuner: - def __init__(self, logelementwise_2d, device, operation, json_path, progress_bar): - self.logelementwise_2d = logger + def __init__(self, logger, device, operation, json_path, progress_bar): + self.logger = logger self.device = device self.operation = operation self.json_path = json_path @@ -42,10 +42,10 @@ class Tuner: operation = self.operation context = sc.driver.context(device) - if self.logelementwise_2d: - self.logelementwise_2d.info("----------------") - self.logelementwise_2d.info(operation.__name__.replace('_','-').upper()) - self.logelementwise_2d.info("----------------") + if self.logger: + self.logger.info("----------------") + self.logger.info(operation.__name__.replace('_','-').upper()) + self.logger.info("----------------") #BLAS1 training sizes if operation in [sc.templates.elementwise_1d, sc.templates.reduce_1d]: @@ -75,7 +75,7 @@ class Tuner: sizes = product(pow2range(4,17), pow2range(4,17)) #BLAS3 training sizes - if operation in [sc.templates.matrix_product_nn, sc.templates.gemm_nt, sc.templates.gemm_tn, sc.templates.gemm_tt]: + if operation in [sc.templates.matrix_product_nn, sc.templates.matrix_product_nt, sc.templates.matrix_product_tn, sc.templates.matrix_product_tt]: if level=='simple': sizes = [(1536,1536,1536)] elif level=='intermediate': @@ -171,7 +171,7 @@ class Tuner: #Retune if necessary if retune: - optimizer = optimize.GeneticOptimizer(self.logelementwise_2d, naccept=1000, niter=1000, cxpb=.4, mutpb=.4, popsize=20, progress_bar = self.progress_bar) + optimizer = optimize.GeneticOptimizer(self.logger, naccept=1000, niter=1000, cxpb=.4, mutpb=.4, popsize=20, progress_bar = self.progress_bar) new = optimizer.run(operation, x, context, prior=predicted)[0] if new not in profiles: profiles.append(new) diff --git a/tune/main.py b/tune/main.py index d3541f065..de76055e7 100644 --- a/tune/main.py +++ b/tune/main.py @@ -6,22 +6,23 @@ from tune.tools import metric_name_of def parse_arguments(): platforms = sc.driver.get_platforms() devices = [d for platform in platforms for d in platform.get_devices()] + #Command line arguments parser = argparse.ArgumentParser() parser.add_argument("-d", "--device", default=0, type=int, help='Device to tune for') parser.add_argument("-j", "--json", default='', type=str) - parser.add_argument('--elementwise_1d', action='store_true', help='Tune AXPY') - parser.add_argument('--reduce_1d', action='store_true', help='Tune DOT') - parser.add_argument('--elementwise_2d', action='store_true', help='Tune GER') - parser.add_argument('--reduce_2d_rows', action='store_true', help='Tune GEMV-N') - parser.add_argument('--reduce_2d_cols', action='store_true', help='Tune GEMV-T') - parser.add_argument('--matrix_product_nn', action='store_true', help='Tune GEMM-NN') - parser.add_argument('--matrix_product_tn', action='store_true', help='Tune GEMM-TN') - parser.add_argument('--matrix_product_nt', action='store_true', help='Tune GEMM-NT') - parser.add_argument('--matrix_product_tt', action='store_true', help='Tune GEMM-TT') - + parser.add_argument('--elementwise_1d', action='store_true', help='Tune ELEMENTWISE [1D]') + parser.add_argument('--elementwise_2d', action='store_true', help='Tune ELEMENTWISE [2D]') + parser.add_argument('--reduce_1d', action='store_true', help='Tune REDUCE [1D]') + parser.add_argument('--reduce_2d_rows', action='store_true', help='Tune REDUCE [2D/rows]') + parser.add_argument('--reduce_2d_cols', action='store_true', help='Tune REDUCE [2D/cols]') + parser.add_argument('--matrix_product_nn', action='store_true', help='Tune MATRIX PRODUCT [NN]') + parser.add_argument('--matrix_product_tn', action='store_true', help='Tune MATRIX PRODUCT [TN]') + parser.add_argument('--matrix_product_nt', action='store_true', help='Tune MATRIX PRODUCT [NT]') + parser.add_argument('--matrix_product_tt', action='store_true', help='Tune MATRIX PRODUCT [TT]') args = parser.parse_args() + #Device device = devices[int(args.device)] print("----------------") print("Devices available:") @@ -30,8 +31,8 @@ def parse_arguments(): selected = '[' + ('x' if device==d else ' ') + ']' print selected , '-', sc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name - - operations = ['elementwise_1d', 'reduce_1d', 'elementwise_2d', 'reduce_2d_rows', 'reduce_2d_cols', 'matrix_product_nn', 'gemm_tn', 'gemm_nt', 'gemm_tt'] + #Operations + operations = ['elementwise_1d', 'reduce_1d', 'elementwise_2d', 'reduce_2d_rows', 'reduce_2d_cols', 'matrix_product_nn', 'matrix_product_tn', 'matrix_product_nt', 'matrix_product_tt'] operations = [getattr(sc.templates,op) for op in operations if getattr(args, op)] return (device, operations, args.json) @@ -63,16 +64,16 @@ class ProgressBar: sys.stdout.flush() if __name__ == "__main__": - logelementwise_2d = logging.getLogger(__name__) + logger = logging.getLogger(__name__) sh = logging.StreamHandler(sys.stdout) sh.setFormatter(logging.Formatter('%(message)s')) sh.setLevel(logging.INFO) - logelementwise_2d.addHandler(sh) - logelementwise_2d.setLevel(logging.INFO) + logger.addHandler(sh) + logger.setLevel(logging.INFO) sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE device, operations, json = parse_arguments() for operation in operations: - tuner = Tuner(logelementwise_2d, device, operation, json, ProgressBar(30, metric_name_of(operation))) - tuner.run(level='intermediate') + tuner = Tuner(logger, device, operation, json, ProgressBar(30, metric_name_of(operation))) + tuner.run(level='simple')