Feature: Merged kernel-fusion branch
* Fuses multiple AXPY kernel * Possibility to add thread-wise for loops in AXPY-like kernels
This commit is contained in:
@@ -73,7 +73,7 @@ def main():
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/profiles/profiles.cpp src/lib/profiles/presets.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/handle.cpp src/lib/driver/dispatch.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/program_cache.cpp src/lib/driver/command_queue.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/program.cpp src/lib/driver/kernel.cpp src/lib/driver/device.cpp src/lib/driver/check.cpp src/lib/driver/context.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/io.cpp src/lib/array.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/binder.cpp src/lib/kernels/keywords.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
@@ -276,11 +276,11 @@ void export_core()
|
||||
.def(bp::self OP bp::self)\
|
||||
ADD_SCALAR_HANDLING(OP)
|
||||
|
||||
bp::class_<sc::expressions_tuple>
|
||||
("array_expression_container", bp::init<sc::array_expression const &>())
|
||||
bp::class_<sc::math_expression>
|
||||
("math_expression_container", bp::init<sc::math_expression const &>())
|
||||
;
|
||||
|
||||
bp::class_<sc::array_expression >("array_expression", bp::no_init)
|
||||
bp::class_<sc::math_expression >("math_expression", bp::no_init)
|
||||
ADD_ARRAY_OPERATOR(+)
|
||||
ADD_ARRAY_OPERATOR(-)
|
||||
ADD_ARRAY_OPERATOR(*)
|
||||
@@ -291,7 +291,7 @@ void export_core()
|
||||
ADD_ARRAY_OPERATOR(<=)
|
||||
ADD_ARRAY_OPERATOR(==)
|
||||
ADD_ARRAY_OPERATOR(!=)
|
||||
.add_property("context", bp::make_function(&sc::array_expression::context, bp::return_internal_reference<>()))
|
||||
.add_property("context", bp::make_function(&sc::math_expression::context, bp::return_internal_reference<>()))
|
||||
.def(bp::self_ns::abs(bp::self))
|
||||
// .def(bp::self_ns::pow(bp::self))
|
||||
;
|
||||
@@ -299,15 +299,15 @@ void export_core()
|
||||
|
||||
#define ADD_ARRAY_OPERATOR(OP) \
|
||||
.def(bp::self OP bp::self)\
|
||||
.def(bp::self OP bp::other<sc::array_expression>())\
|
||||
.def(bp::other<sc::array_expression>() OP bp::self) \
|
||||
.def(bp::self OP bp::other<sc::math_expression>())\
|
||||
.def(bp::other<sc::math_expression>() OP bp::self) \
|
||||
ADD_SCALAR_HANDLING(OP)
|
||||
|
||||
bp::class_<sc::array,
|
||||
std::shared_ptr<sc::array> >
|
||||
( "array", bp::no_init)
|
||||
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")= bp::object())))
|
||||
.def(bp::init<sc::array_expression>())
|
||||
.def(bp::init<sc::math_expression>())
|
||||
.add_property("dtype", &sc::array::dtype)
|
||||
.add_property("context", bp::make_function(&sc::array::context, bp::return_internal_reference<>()))
|
||||
.add_property("T", &sc::array::T)
|
||||
@@ -336,15 +336,15 @@ void export_core()
|
||||
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
|
||||
|
||||
//Assign
|
||||
bp::def("assign", static_cast<sc::array_expression (*)(sc::array const &, sc::array const &)>(&sc::assign));\
|
||||
bp::def("assign", static_cast<sc::array_expression (*)(sc::array const &, sc::array_expression const &)>(&sc::assign));\
|
||||
bp::def("assign", static_cast<sc::math_expression (*)(sc::array const &, sc::array const &)>(&sc::assign));\
|
||||
bp::def("assign", static_cast<sc::math_expression (*)(sc::array const &, sc::math_expression const &)>(&sc::assign));\
|
||||
|
||||
//Binary
|
||||
#define MAP_FUNCTION(name) \
|
||||
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::array const &)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::array const &)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::array_expression const &)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::array_expression const &)>(&sc::name));
|
||||
bp::def(#name, static_cast<sc::math_expression (*)(sc::array const &, sc::array const &)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::math_expression (*)(sc::math_expression const &, sc::array const &)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::math_expression (*)(sc::array const &, sc::math_expression const &)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::math_expression (*)(sc::math_expression const &, sc::math_expression const &)>(&sc::name));
|
||||
|
||||
MAP_FUNCTION(maximum)
|
||||
MAP_FUNCTION(minimum)
|
||||
@@ -354,8 +354,8 @@ void export_core()
|
||||
|
||||
//Unary
|
||||
#define MAP_FUNCTION(name) \
|
||||
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &)>(&sc::name));
|
||||
bp::def(#name, static_cast<sc::math_expression (*)(sc::array const &)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::math_expression (*)(sc::math_expression const &)>(&sc::name));
|
||||
|
||||
bp::def("zeros", &detail::create_zeros_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
|
||||
|
||||
@@ -380,8 +380,8 @@ void export_core()
|
||||
/*--- Reduction operators----*/
|
||||
//---------------------------------------
|
||||
#define MAP_FUNCTION(name) \
|
||||
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::int_t)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::int_t)>(&sc::name));
|
||||
bp::def(#name, static_cast<sc::math_expression (*)(sc::array const &, sc::int_t)>(&sc::name));\
|
||||
bp::def(#name, static_cast<sc::math_expression (*)(sc::math_expression const &, sc::int_t)>(&sc::name));
|
||||
|
||||
MAP_FUNCTION(sum)
|
||||
MAP_FUNCTION(max)
|
||||
|
@@ -62,7 +62,7 @@ namespace detail
|
||||
std::shared_ptr<sc::driver::Context> make_context(sc::driver::Device const & dev)
|
||||
{ return std::shared_ptr<sc::driver::Context>(new sc::driver::Context(dev)); }
|
||||
|
||||
bp::object enqueue(sc::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
|
||||
bp::object enqueue(sc::math_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
|
||||
{
|
||||
std::list<sc::driver::Event> events;
|
||||
std::vector<sc::driver::Event> cdependencies = tools::to_vector<sc::driver::Event>(dependencies);
|
||||
@@ -70,15 +70,15 @@ namespace detail
|
||||
sc::execution_options_type execution_options(queue_id, &events, &cdependencies);
|
||||
sc::dispatcher_options_type dispatcher_options(tune, label);
|
||||
sc::compilation_options_type compilation_options(program_name, force_recompile);
|
||||
sc::array_expression::container_type::value_type root = expression.tree()[expression.root()];
|
||||
sc::math_expression::container_type::value_type root = expression.tree()[expression.root()];
|
||||
if(sc::detail::is_assignment(root.op))
|
||||
{
|
||||
sc::execute(sc::control(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context())));
|
||||
sc::execute(sc::execution_handler(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context())));
|
||||
return bp::make_tuple(bp::ptr(root.lhs.array), tools::to_list(events.begin(), events.end()));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::shared_ptr<sc::array> parray(new sc::array(sc::control(expression, execution_options, dispatcher_options, compilation_options)));
|
||||
std::shared_ptr<sc::array> parray(new sc::array(sc::execution_handler(expression, execution_options, dispatcher_options, compilation_options)));
|
||||
return bp::make_tuple(parray, tools::to_list(events.begin(), events.end()));
|
||||
}
|
||||
}
|
||||
|
@@ -13,7 +13,7 @@ namespace tpt = isaac::templates;
|
||||
|
||||
namespace detail
|
||||
{
|
||||
bp::list input_sizes(tpt::base & temp, sc::expressions_tuple const & tree)
|
||||
bp::list input_sizes(tpt::base & temp, sc::math_expression const & tree)
|
||||
{
|
||||
std::vector<isaac::int_t> tmp = temp.input_sizes(tree);
|
||||
return tools::to_list(tmp.begin(), tmp.end());
|
||||
|
Reference in New Issue
Block a user