Code quality: more renaming

This commit is contained in:
Philippe Tillet
2015-12-16 16:34:36 -05:00
parent 761a741731
commit 83feed534c
13 changed files with 110 additions and 110 deletions

View File

@@ -26,8 +26,8 @@ inline expression_type expression_type_from_string(std::string const & name)
if(name=="elementwise_1d") return AXPY_TYPE;
if(name=="reduce_1d") return DOT_TYPE;
if(name=="elementwise_2d") return GER_TYPE;
if(name=="reduce_2d_n") return GEMV_N_TYPE;
if(name=="reduce_2d_t") return GEMV_T_TYPE;
if(name=="reduce_2d_rows") return GEMV_N_TYPE;
if(name=="reduce_2d_cols") return GEMV_T_TYPE;
if(name=="matrix_product_nn") return GEMM_NN_TYPE;
if(name=="matrix_product_nt") return GEMM_NT_TYPE;
if(name=="matrix_product_tn") return GEMM_TN_TYPE;

View File

@@ -41,18 +41,18 @@ private:
reduce_1d_type reduce_1d_type_;
};
class reduce_2d_n : public reduce_2d
class reduce_2d_rows : public reduce_2d
{
public:
reduce_2d_n(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
reduce_2d_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
reduce_2d_rows(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
reduce_2d_rows(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
};
class reduce_2d_t : public reduce_2d
class reduce_2d_cols : public reduce_2d
{
public:
reduce_2d_t(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
reduce_2d_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
reduce_2d_cols(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
reduce_2d_cols(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
};
}

View File

@@ -406,14 +406,14 @@ void reduce_2d::enqueue(driver::CommandQueue & queue, driver::Program const & pr
control.execution_options().enqueue(program.context(), kernels[i], global[i], local[i]);
}
reduce_2d_n::reduce_2d_n(reduce_2d_parameters const & parameters,binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_ROWS, binding_policy){}
reduce_2d_rows::reduce_2d_rows(reduce_2d_parameters const & parameters,binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_ROWS, binding_policy){}
reduce_2d_n::reduce_2d_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
reduce_2d_rows::reduce_2d_rows(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
fetching_policy_type fetch, binding_policy_t bind): reduce_2d(reduce_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_ROWS, bind) {}
reduce_2d_t::reduce_2d_t(reduce_2d::parameters_type const & parameters, binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_COLUMNS, binding_policy){}
reduce_2d_cols::reduce_2d_cols(reduce_2d::parameters_type const & parameters, binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_COLUMNS, binding_policy){}
reduce_2d_t::reduce_2d_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
reduce_2d_cols::reduce_2d_cols(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
fetching_policy_type fetch, binding_policy_t bind): reduce_2d(reduce_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_COLUMNS, bind) {}

View File

@@ -140,10 +140,10 @@ std::shared_ptr<templates::base> profiles::create(std::string const & template_n
return std::shared_ptr<templates::base>(new templates::reduce_1d(x[0], x[1], x[2], fetch[x[3]]));
else if(template_name=="elementwise_2d")
return std::shared_ptr<templates::base>(new templates::elementwise_2d(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("reduce_2d_n")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::reduce_2d_n(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("reduce_2d_t")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::reduce_2d_t(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("reduce_2d_rows")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::reduce_2d_rows(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("reduce_2d_cols")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::reduce_2d_cols(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("matrix_product_nn")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::matrix_product_nn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("matrix_product_tn")!=std::string::npos)
@@ -163,7 +163,7 @@ void profiles::import(std::string const & str, driver::CommandQueue const & queu
rapidjson::Document document;
document.Parse<0>(str.c_str());
//Deserialize
std::vector<std::string> operations = {"elementwise_1d", "reduce_1d", "elementwise_2d", "reduce_2d_n", "reduce_2d_t", "matrix_product_nn", "matrix_product_tn", "matrix_product_nt", "matrix_product_tt"};
std::vector<std::string> operations = {"elementwise_1d", "reduce_1d", "elementwise_2d", "reduce_2d_rows", "reduce_2d_cols", "matrix_product_nn", "matrix_product_tn", "matrix_product_nt", "matrix_product_tt"};
std::vector<std::string> dtype = {"float32", "float64"};
for(auto & operation : operations)
{
@@ -268,8 +268,8 @@ std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::ba
res[std::make_pair(AXPY_TYPE, DTYPE)] = ptr_t (new templates::elementwise_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(DOT_TYPE, DTYPE)] = ptr_t(new templates::reduce_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GER_TYPE, DTYPE)] = ptr_t(new templates::elementwise_2d(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_N_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_n(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_T_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_t(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_N_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_rows(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_T_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_cols(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMM_NN_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_TN_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_NT_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));

View File

@@ -13,13 +13,13 @@ B = sc.empty((K, N), sc.float32)
queue = A.context.queues[0]
#Benchmark profile 1
queue.profiles[sc.templates.gemm_nn, sc.float32] = sc.profile(templates.gemm_nn(1,8,16,8,1,8,1,8,templates.FETCH_FROM_LOCAL,templates.FETCH_FROM_LOCAL,8,8), sc.float32, queue)
C, events = sc.driver.enqueue(sc.dot(A, B))
queue.profiles[sc.templates.matrix_product_nn, sc.float32] = sc.profile(templates.gemm_nn(1,8,16,8,1,8,1,8,templates.FETCH_FROM_LOCAL,templates.FETCH_FROM_LOCAL,8,8), sc.float32, queue)
C, events = sc.driver.enqueue(sc.reduce_1d(A, B))
C.context.synchronize()
print 'Profile 1 finished in', sum([e.elapsed_time for e in events])*1e-9, 's'
#Benchmark profile 2
queue.profiles[sc.templates.gemm_nn, sc.float32] = sc.profile(templates.gemm_nn(1,8,16,16,1,8,1,8,templates.FETCH_FROM_LOCAL,templates.FETCH_FROM_LOCAL,8,16), sc.float32, queue)
C, events = sc.driver.enqueue(sc.dot(A, B))
queue.profiles[sc.templates.matrix_product_nn, sc.float32] = sc.profile(templates.gemm_nn(1,8,16,16,1,8,1,8,templates.FETCH_FROM_LOCAL,templates.FETCH_FROM_LOCAL,8,16), sc.float32, queue)
C, events = sc.driver.enqueue(sc.reduce_1d(A, B))
C.context.synchronize()
print 'Profile 2 finished in', sum([e.elapsed_time for e in events])*1e-9, 's'

View File

@@ -73,7 +73,7 @@ def main():
libraries += ['gnustl_shared']
#Source files
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/reduce_2d.cpp src/lib/kernels/templates/elementwise_2d.cpp src/lib/kernels/templates/elementwise_1d.cpp src/lib/kernels/templates/reduce_1d.cpp src/lib/kernels/templates/matrix_product.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
src = 'src/lib/exception/operation_not_supported.cpp src/lib/exception/unknown_datatype.cpp src/lib/value_scalar.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/backend.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/buffer.cpp src/lib/driver/context.cpp src/lib/driver/dispatch.cpp src/lib/kernels/templates/reduce_1d.cpp src/lib/kernels/templates/elementwise_1d.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/elementwise_2d.cpp src/lib/kernels/templates/matrix_product.cpp src/lib/kernels/templates/reduce_2d.cpp src/lib/kernels/stream.cpp src/lib/kernels/keywords.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/binder.cpp src/lib/kernels/parse.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/io.cpp src/lib/symbolic/preset.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]

View File

@@ -70,15 +70,15 @@ namespace tools
else
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="axpy") return sc::AXPY_TYPE;
else if(name=="ger") return sc::GER_TYPE;
if(name=="elementwise_1d") return sc::AXPY_TYPE;
else if(name=="elementwise_2d") return sc::GER_TYPE;
else if(name=="dot") return sc::DOT_TYPE;
else if(name=="gemv_n") return sc::GEMV_N_TYPE;
else if(name=="gemv_t") return sc::GEMV_T_TYPE;
else if(name=="gemm_nn") return sc::GEMM_NN_TYPE;
else if(name=="gemm_tn") return sc::GEMM_TN_TYPE;
else if(name=="gemm_nt") return sc::GEMM_NT_TYPE;
else if(name=="gemm_tt") return sc::GEMM_TT_TYPE;
else if(name=="reduce_2d_rows") return sc::GEMV_N_TYPE;
else if(name=="reduce_2d_cols") return sc::GEMV_T_TYPE;
else if(name=="matrix_product_nn") return sc::GEMM_NN_TYPE;
else if(name=="matrix_product_tn") return sc::GEMM_TN_TYPE;
else if(name=="matrix_product_nt") return sc::GEMM_NT_TYPE;
else if(name=="matrix_product_tt") return sc::GEMM_TT_TYPE;
else
{
PyErr_SetString(PyExc_TypeError, "Template type not understood");

View File

@@ -1,8 +1,8 @@
#include "isaac/kernels/templates/axpy.h"
#include "isaac/kernels/templates/ger.h"
#include "isaac/kernels/templates/elementwise_1d.h"
#include "isaac/kernels/templates/elementwise_2d.h"
#include "isaac/kernels/templates/dot.h"
#include "isaac/kernels/templates/gemv.h"
#include "isaac/kernels/templates/gemm.h"
#include "isaac/kernels/templates/matrix_product.h"
#include "common.hpp"
#include "kernels.h"
@@ -56,17 +56,17 @@ void export_templates()
#define WRAP_SINGLE_TEMPLATE(name, ...) WRAP_BASE(name) WRAP_TEMPLATE(name, name, __VA_ARGS__)
//Vector AXPY
WRAP_SINGLE_TEMPLATE(axpy, uint, uint, uint, tpt::fetching_policy_type)
WRAP_SINGLE_TEMPLATE(ger, uint, uint, uint, uint, uint, tpt::fetching_policy_type)
WRAP_SINGLE_TEMPLATE(elementwise_1d, uint, uint, uint, tpt::fetching_policy_type)
WRAP_SINGLE_TEMPLATE(elementwise_2d, uint, uint, uint, uint, uint, tpt::fetching_policy_type)
WRAP_SINGLE_TEMPLATE(dot, uint, uint, uint, tpt::fetching_policy_type)
WRAP_BASE(gemv)
WRAP_TEMPLATE(gemv_n, gemv, uint, uint, uint, uint, uint, tpt::fetching_policy_type)
WRAP_TEMPLATE(gemv_t, gemv, uint, uint, uint, uint, uint, tpt::fetching_policy_type)
WRAP_BASE(gemm)
WRAP_TEMPLATE(gemm_nn, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(gemm_tn, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(gemm_nt, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(gemm_tt, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(reduce_2d_rows, gemv, uint, uint, uint, uint, uint, tpt::fetching_policy_type)
WRAP_TEMPLATE(reduce_2d_cols, gemv, uint, uint, uint, uint, uint, tpt::fetching_policy_type)
WRAP_BASE(matrix_product)
WRAP_TEMPLATE(matrix_product_nn, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(matrix_product_tn, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(matrix_product_nt, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(matrix_product_tt, gemm, uint, uint, uint, uint, uint, uint, uint, uint, tpt::fetching_policy_type, tpt::fetching_policy_type, uint, uint)
}

View File

@@ -10,7 +10,7 @@ from tune.tune import Tuner
from tune.tools import metric_name_of
#Kivy
from kivy.logger import Logger
from kivy.logelementwise_2d import Logger
from kivy.uix.scrollview import ScrollView
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.label import Label
@@ -19,7 +19,7 @@ from kivy.properties import BooleanProperty
from kivy.app import App
from kivy.lang import Builder
from kivy.properties import NumericProperty, StringProperty, BooleanProperty,ListProperty
from kivy.uix.screenmanager import Screen
from kivy.uix.screenmanaelementwise_2d import Screen
from kivy.uix.settings import SettingsWithNoMenu
@@ -42,7 +42,7 @@ class ScrollableLabel(ScrollView):
text = StringProperty('')
font_name = StringProperty('')
class LabelLogger:
class LabelLogelementwise_2d:
def __init__(self, label):
self.label = label;
@@ -110,7 +110,7 @@ class IsaacApp(App):
self.settings_cls = SettingsWithNoMenu
self.use_kivy_settings = False
#Screen Manager
#Screen Manaelementwise_2d
self.screen_names = ['Tune']
self.screens = {}
current_directory = dirname(realpath(__file__))
@@ -121,21 +121,21 @@ class IsaacApp(App):
#Default view
self.show_tune()
#Logger
self.logger = LabelLogger(self.screens['Tune'].ids.out)
#Logelementwise_2d
self.logelementwise_2d = LabelLogger(self.screens['Tune'].ids.out)
if on_android:
@run_on_ui_thread
def lock_screen(self):
from jnius import autoclass
PythonActivity = autoclass('org.renpy.android.PythonActivity')
Params = autoclass('android.view.WindowManager$LayoutParams')
Params = autoclass('android.view.WindowManaelementwise_2d$LayoutParams')
PythonActivity.mActivity.getWindow().addFlags(Params.FLAG_KEEP_SCREEN_ON)
@run_on_ui_thread
def unlock_screen(self):
PythonActivity = autoclass('org.renpy.android.PythonActivity')
Params = autoclass('android.view.WindowManager$LayoutParams')
Params = autoclass('android.view.WindowManaelementwise_2d$LayoutParams')
PythonActivity.mActivity.getWindow().clearFlags(Params.FLAG_KEEP_SCREEN_ON)
def start_tuning(self):
@@ -145,19 +145,19 @@ class IsaacApp(App):
device = next(x for x in self.isaac_handler.devices if x.name==self.config.get('hardware', 'device'))
#FIXME: Move profiling logics into tuning
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
self.logger.info('Using ' + device.name)
self.logger.info('')
self.logelementwise_2d.info('Using ' + device.name)
self.logelementwise_2d.info('')
def run():
if on_android:
self.lock_screen()
operations = [('blas1', (sc.templates.axpy,)),
('blas2', (sc.templates.gemv_n, sc.templates.gemv_t)),
('blas3', (sc.templates.gemm_nn, sc.templates.gemm_tn, sc.templates.gemm_nt, sc.templates.gemm_tt))]
operations = [('blas1', (sc.templates.elementwise_1d,)),
('blas2', (sc.templates.reduce_2d_rows, sc.templates.reduce_2d_cols)),
('blas3', (sc.templates.matrix_product_nn, sc.templates.gemm_tn, sc.templates.gemm_nt, sc.templates.gemm_tt))]
for opclass, optype in operations:
for op in optype:
progress_bar = LabelProgressBar(10, self.logger.label, metric_name_of(op))
tuner = Tuner(self.logger, device, op, json_path='', progress_bar=progress_bar)
progress_bar = LabelProgressBar(10, self.logelementwise_2d.label, metric_name_of(op))
tuner = Tuner(self.logelementwise_2d, device, op, json_path='', progress_bar=progress_bar)
tuner.run(self.config.get('autotuning', opclass).lower())
tid = thread.start_new_thread(run, ())

View File

@@ -44,8 +44,8 @@ def exhaustive(template, sizes, context):
class GeneticOptimizer:
def __init__(self, logger, naccept=500, niter=1000, cxpb=.4, mutpb=.4, popsize=10, progress_bar = None):
self.logger = logger
def __init__(self, logelementwise_2d, naccept=500, niter=1000, cxpb=.4, mutpb=.4, popsize=10, progress_bar = None):
self.logelementwise_2d = logger
self.naccept = naccept
self.niter = niter
self.cxpb = cxpb
@@ -164,15 +164,15 @@ def is_local_optimum(parameters, template, sizes, context):
tree, _ = tools.tree_of(template, sizes, context)
genetic_infos = tools.genetic_infos_of(template)
if issubclass(template, sc.templates.axpy):
if issubclass(template, sc.templates.elementwise_1d):
sweep_over = [0,1,2]
elif issubclass(template, sc.templates.dot):
elif issubclass(template, sc.templates.reduce_1d):
sweep_over = [0,1,2]
elif issubclass(template, sc.templates.ger):
elif issubclass(template, sc.templates.elementwise_2d):
sweep_over = [0,1,2,3,4]
elif issubclass(template, sc.templates.gemv):
sweep_over = [0,1,2,3,4]
elif issubclass(template, sc.templates.gemm):
elif issubclass(template, sc.templates.matrix_product):
sweep_over = [1,3,5,7]
#Evaluate the provided parameters guess

View File

@@ -37,72 +37,72 @@ def benchmark(template, setting, tree):
def tree_of(template, sizes, context):
if issubclass(template, sc.templates.axpy):
if issubclass(template, sc.templates.elementwise_1d):
N, = sizes
x = sc.empty(N, dtype=sc.float32, context=context)
y = sc.empty(N, dtype=sc.float32, context=context)
return sc.assign(y, x + y), (x, y)
elif issubclass(template, sc.templates.dot):
elif issubclass(template, sc.templates.reduce_1d):
N, = sizes
x = sc.empty(N, context=context)
y = sc.empty(N, context=context)
return sc.dot(x, y), (x, y)
elif issubclass(template, sc.templates.ger):
return sc.reduce_1d(x, y), (x, y)
elif issubclass(template, sc.templates.elementwise_2d):
M, N = sizes
A = sc.empty((M,N), context=context)
B = sc.empty((M,N), context=context)
return A + B, (A, B)
elif issubclass(template, sc.templates.gemv):
T = template is sc.templates.gemv_t
T = template is sc.templates.reduce_2d_cols
M, N = sizes[::-1] if T else sizes
A = sc.empty((M,N), context=context)
x = sc.empty(N, context=context)
return sc.dot(A.T, x) if T else sc.dot(A, x), (A, x)
elif issubclass(template, sc.templates.gemm):
AT = template is sc.templates.gemm_tn or template is sc.templates.gemm_tt
BT = template is sc.templates.gemm_nt or template is sc.templates.gemm_tt
return sc.reduce_1d(A.T, x) if T else sc.dot(A, x), (A, x)
elif issubclass(template, sc.templates.matrix_product):
AT = template is sc.templates.matrix_product_tn or template is sc.templates.gemm_tt
BT = template is sc.templates.matrix_product_nt or template is sc.templates.gemm_tt
M, N, K = sizes
A = sc.empty((K, M) if AT else (M, K), context=context)
B = sc.empty((N, K) if BT else (K, N), context=context)
AA = A.T if AT else A
BB = B.T if BT else B
return sc.dot(AA, BB), (A, B)
return sc.reduce_1d(AA, BB), (A, B)
def memory_footprint(template, sizes):
if issubclass(template, sc.templates.axpy):
if issubclass(template, sc.templates.elementwise_1d):
return 4*3*sizes[0]*1e-9
elif issubclass(template, sc.templates.dot):
elif issubclass(template, sc.templates.reduce_1d):
return 4*2*sizes[0]*1e-9
elif issubclass(template, sc.templates.ger):
elif issubclass(template, sc.templates.elementwise_2d):
return 4*sizes[0]*sizes[1]*1e-9
elif issubclass(template, sc.templates.gemv):
return 4*sizes[0]*sizes[1]*1e-9
elif issubclass(template, sc.templates.gemm):
elif issubclass(template, sc.templates.matrix_product):
return 4*(sizes[0]*sizes[1] + sizes[0]*sizes[2] + sizes[1]*sizes[2])*1e-9
def metric_of(template):
memory_bound = [sc.templates.axpy, sc.templates.dot, sc.templates.ger, sc.templates.gemv]
compute_bound = [sc.templates.gemm]
memory_bound = [sc.templates.elementwise_1d, sc.templates.reduce_1d, sc.templates.elementwise_2d, sc.templates.gemv]
compute_bound = [sc.templates.matrix_product]
if any([issubclass(template, x) for x in memory_bound]):
return lambda sizes, t: memory_footprint(template, sizes)/t
elif any([issubclass(template, x) for x in compute_bound]):
return lambda sizes, t: 2*sizes[0]*sizes[1]*sizes[2]*1e-9/t
def metric_name_of(template):
if issubclass(template, sc.templates.gemm):
if issubclass(template, sc.templates.matrix_product):
return 'GFLOPS'
return 'GB/S'
def genetic_infos_of(template):
if issubclass(template, sc.templates.axpy):
if issubclass(template, sc.templates.elementwise_1d):
return {'categorical': [3], 'nbits': [3,4,4,2] }
elif issubclass(template, sc.templates.dot):
elif issubclass(template, sc.templates.reduce_1d):
return {'categorical': [3], 'nbits':[3,4,4,2]}
elif issubclass(template, sc.templates.ger):
elif issubclass(template, sc.templates.elementwise_2d):
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
elif issubclass(template, sc.templates.gemv):
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
elif issubclass(template, sc.templates.gemm):
elif issubclass(template, sc.templates.matrix_product):
return {'categorical': [8,9], 'nbits': [3,3,3,3,3,2,2,2,2,2,3,3]}

View File

@@ -26,8 +26,8 @@ def pow2range(a, b):
class Tuner:
def __init__(self, logger, device, operation, json_path, progress_bar):
self.logger = logger
def __init__(self, logelementwise_2d, device, operation, json_path, progress_bar):
self.logelementwise_2d = logger
self.device = device
self.operation = operation
self.json_path = json_path
@@ -42,13 +42,13 @@ class Tuner:
operation = self.operation
context = sc.driver.context(device)
if self.logger:
self.logger.info("----------------")
self.logger.info(operation.__name__.replace('_','-').upper())
self.logger.info("----------------")
if self.logelementwise_2d:
self.logelementwise_2d.info("----------------")
self.logelementwise_2d.info(operation.__name__.replace('_','-').upper())
self.logelementwise_2d.info("----------------")
#BLAS1 training sizes
if operation in [sc.templates.axpy, sc.templates.dot]:
if operation in [sc.templates.elementwise_1d, sc.templates.reduce_1d]:
if level=='simple':
sizes = [(10000000,)]
elif level=='intermediate':
@@ -57,7 +57,7 @@ class Tuner:
sizes = [(x,) for x in tools.expspace(1e3, 1e8, 100)]
#BLAS2 training sizes
if operation in [sc.templates.ger, sc.templates.gemv_n, sc.templates.gemv_t]:
if operation in [sc.templates.elementwise_2d, sc.templates.reduce_2d_rows, sc.templates.reduce_2d_cols]:
if level=='simple':
sizes = [(1536, 1536)]
elif level=='intermediate':
@@ -75,7 +75,7 @@ class Tuner:
sizes = product(pow2range(4,17), pow2range(4,17))
#BLAS3 training sizes
if operation in [sc.templates.gemm_nn, sc.templates.gemm_nt, sc.templates.gemm_tn, sc.templates.gemm_tt]:
if operation in [sc.templates.matrix_product_nn, sc.templates.gemm_nt, sc.templates.gemm_tn, sc.templates.gemm_tt]:
if level=='simple':
sizes = [(1536,1536,1536)]
elif level=='intermediate':
@@ -171,7 +171,7 @@ class Tuner:
#Retune if necessary
if retune:
optimizer = optimize.GeneticOptimizer(self.logger, naccept=1000, niter=1000, cxpb=.4, mutpb=.4, popsize=20, progress_bar = self.progress_bar)
optimizer = optimize.GeneticOptimizer(self.logelementwise_2d, naccept=1000, niter=1000, cxpb=.4, mutpb=.4, popsize=20, progress_bar = self.progress_bar)
new = optimizer.run(operation, x, context, prior=predicted)[0]
if new not in profiles:
profiles.append(new)

View File

@@ -10,15 +10,15 @@ def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--device", default=0, type=int, help='Device to tune for')
parser.add_argument("-j", "--json", default='', type=str)
parser.add_argument('--axpy', action='store_true', help='Tune AXPY')
parser.add_argument('--dot', action='store_true', help='Tune DOT')
parser.add_argument('--ger', action='store_true', help='Tune GER')
parser.add_argument('--gemv_n', action='store_true', help='Tune GEMV-N')
parser.add_argument('--gemv_t', action='store_true', help='Tune GEMV-T')
parser.add_argument('--gemm_nn', action='store_true', help='Tune GEMM-NN')
parser.add_argument('--gemm_tn', action='store_true', help='Tune GEMM-TN')
parser.add_argument('--gemm_nt', action='store_true', help='Tune GEMM-NT')
parser.add_argument('--gemm_tt', action='store_true', help='Tune GEMM-TT')
parser.add_argument('--elementwise_1d', action='store_true', help='Tune AXPY')
parser.add_argument('--reduce_1d', action='store_true', help='Tune DOT')
parser.add_argument('--elementwise_2d', action='store_true', help='Tune GER')
parser.add_argument('--reduce_2d_rows', action='store_true', help='Tune GEMV-N')
parser.add_argument('--reduce_2d_cols', action='store_true', help='Tune GEMV-T')
parser.add_argument('--matrix_product_nn', action='store_true', help='Tune GEMM-NN')
parser.add_argument('--matrix_product_tn', action='store_true', help='Tune GEMM-TN')
parser.add_argument('--matrix_product_nt', action='store_true', help='Tune GEMM-NT')
parser.add_argument('--matrix_product_tt', action='store_true', help='Tune GEMM-TT')
args = parser.parse_args()
@@ -31,7 +31,7 @@ def parse_arguments():
print selected , '-', sc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name
operations = ['axpy', 'dot', 'ger', 'gemv_n', 'gemv_t', 'gemm_nn', 'gemm_tn', 'gemm_nt', 'gemm_tt']
operations = ['elementwise_1d', 'reduce_1d', 'elementwise_2d', 'reduce_2d_rows', 'reduce_2d_cols', 'matrix_product_nn', 'gemm_tn', 'gemm_nt', 'gemm_tt']
operations = [getattr(sc.templates,op) for op in operations if getattr(args, op)]
return (device, operations, args.json)
@@ -63,16 +63,16 @@ class ProgressBar:
sys.stdout.flush()
if __name__ == "__main__":
logger = logging.getLogger(__name__)
logelementwise_2d = logging.getLogger(__name__)
sh = logging.StreamHandler(sys.stdout)
sh.setFormatter(logging.Formatter('%(message)s'))
sh.setLevel(logging.INFO)
logger.addHandler(sh)
logger.setLevel(logging.INFO)
logelementwise_2d.addHandler(sh)
logelementwise_2d.setLevel(logging.INFO)
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
device, operations, json = parse_arguments()
for operation in operations:
tuner = Tuner(logger, device, operation, json, ProgressBar(30, metric_name_of(operation)))
tuner = Tuner(logelementwise_2d, device, operation, json, ProgressBar(30, metric_name_of(operation)))
tuner.run(level='intermediate')