diff --git a/include/isaac/kernels/templates/base.h b/include/isaac/kernels/templates/base.h index 223b261b6..7416d8930 100644 --- a/include/isaac/kernels/templates/base.h +++ b/include/isaac/kernels/templates/base.h @@ -57,86 +57,10 @@ public: unsigned int local_size_1; unsigned int num_kernels; }; - - class invalid_exception : public std::exception - { - public: - invalid_exception(); - invalid_exception(std::string message); - virtual const char* what() const throw(); - virtual ~invalid_exception() throw(); - private: - std::string message_; - }; - protected: - - /** @brief Functor to map the expressions to the types defined in mapped_objects.hpp */ - class map_functor : public traversal_functor - { - /** @brief Accessor for the numeric type */ - numeric_type get_numeric_type(isaac::array_expression const * array_expression, int_t root_idx) const; - /** @brief Creates a binary leaf */ - template std::shared_ptr binary_leaf(isaac::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const; - /** @brief Creates a value scalar mapping */ - std::shared_ptr create(numeric_type dtype, values_holder) const; - /** @brief Creates a vector mapping */ - std::shared_ptr create(array const *) const; - /** @brief Creates a tuple mapping */ - std::shared_ptr create(repeat_infos const &) const; - /** @brief Creates a mapping */ - std::shared_ptr create(lhs_rhs_element const &) const; - public: - map_functor(symbolic_binder & binder, mapping_type & mapping, const driver::Device &device); - /** @brief Functor for traversing the tree */ - void operator()(isaac::array_expression const & array_expression, int_t root_idx, leaf_t leaf_t) const; - private: - symbolic_binder & binder_; - mapping_type & mapping_; - driver::Device const & device_; - }; - - /** @brief functor for setting the arguments of a kernel */ - class set_arguments_functor : public traversal_functor - { - public: - typedef void result_type; - - set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, driver::Kernel & kernel); - void set_arguments(numeric_type dtype, values_holder const & scal) const; - void set_arguments(array const * ) const; - void set_arguments(repeat_infos const & i) const; - void set_arguments(lhs_rhs_element const & lhs_rhs) const; - - void operator()(isaac::array_expression const & array_expression, int_t root_idx, leaf_t leaf_t) const; - private: - symbolic_binder & binder_; - unsigned int & current_arg_; - driver::Kernel & kernel_; - }; - - static void compute_dot(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op); - static void compute_index_dot(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op); - static void process_all(std::string const & type_key, std::string const & str, kernel_generation_stream & stream, std::vector const & mappings); - static void process_all_at(std::string const & type_key, std::string const & str, kernel_generation_stream & stream, std::vector const & mappings, size_t root_idx, leaf_t leaf); - static std::string neutral_element(op_element const & op, driver::backend_type backend, std::string const & datatype); - static std::string generate_arguments(std::vector const & mappings, std::map const & accessors, expressions_tuple const & expressions); - static std::string generate_arguments(std::string const & data_type, driver::Device const & device, std::vector const & mappings, expressions_tuple const & expressions); - static bool is_node_trans(array_expression::container_type const & array, size_t root_idx, leaf_t leaf_type); - static std::string append_simd_suffix(std::string const & str, unsigned int i); - static bool is_strided(array_expression::node const & node); static int_t vector_size(array_expression::node const & node); static std::pair matrix_size(array_expression::node const & node); - static bool is_dot(array_expression::node const & node); - static bool is_index_dot(op_element const & op); - static std::string access_vector_type(std::string const & v, int i); - - std::shared_ptr make_binder(); - static std::string vstore(unsigned int simd_width, std::string const & dtype, std::string const & value, std::string const & offset, std::string const & ptr, driver::backend_type backend); - static std::string vload(unsigned int simd_width, std::string const & dtype, std::string const & offset, std::string const & ptr, driver::backend_type backend); - static std::string append_width(std::string const & str, unsigned int width); static bool requires_fallback(expressions_tuple const & expressions); - void set_arguments(expressions_tuple const & expressions, driver::Kernel & kernel, unsigned int & current_arg); private: virtual std::string generate_impl(std::string const & suffix, expressions_tuple const & expressions, driver::Device const & device, std::vector const & mapping) const = 0; public: diff --git a/lib/kernels/templates/axpy.cpp b/lib/kernels/templates/axpy.cpp index ca310e109..26360fee1 100644 --- a/lib/kernels/templates/axpy.cpp +++ b/lib/kernels/templates/axpy.cpp @@ -7,6 +7,8 @@ #include "isaac/driver/backend.h" #include "tools/loop.hpp" +#include "tools/vector_types.hpp" +#include "tools/arguments.hpp" #include "to_string.hpp" @@ -133,7 +135,7 @@ void axpy::enqueue(driver::CommandQueue & queue, driver::Program const & program //Arguments unsigned int current_arg = 0; kernel.setSizeArg(current_arg++, size); - set_arguments(expressions, kernel, current_arg); + set_arguments(expressions, kernel, current_arg, binding_policy_); controller.execution_options().enqueue(program.context(), kernel, global, local); } diff --git a/lib/kernels/templates/base.cpp b/lib/kernels/templates/base.cpp index 0e59547d3..c6f426cb6 100644 --- a/lib/kernels/templates/base.cpp +++ b/lib/kernels/templates/base.cpp @@ -14,6 +14,8 @@ #include "isaac/exception/unknown_datatype.h" #include "isaac/symbolic/io.h" +#include "tools/map.hpp" + #include "to_string.hpp" namespace isaac @@ -24,323 +26,6 @@ namespace templates base::parameters_type::parameters_type(unsigned int _simd_width, int_t _local_size_1, int_t _local_size_2, int_t _num_kernels) : simd_width(_simd_width), local_size_0(_local_size_1), local_size_1(_local_size_2), num_kernels(_num_kernels) { } -numeric_type base::map_functor::get_numeric_type(isaac::array_expression const * array_expression, int_t root_idx) const -{ - array_expression::node const * root_node = &array_expression->tree()[root_idx]; - while (root_node->lhs.dtype==INVALID_NUMERIC_TYPE) - root_node = &array_expression->tree()[root_node->lhs.node_index]; - return root_node->lhs.dtype; -} - -/** @brief Binary leaf */ -template -std::shared_ptr base::map_functor::binary_leaf(isaac::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const -{ - return std::shared_ptr(new T(to_string(array_expression->dtype()), binder_.get(), mapped_object::node_info(mapping, array_expression, root_idx))); -} - -/** @brief Scalar mapping */ -std::shared_ptr base::map_functor::create(numeric_type dtype, values_holder) const -{ - std::string strdtype = to_string(dtype); - return std::shared_ptr(new mapped_host_scalar(strdtype, binder_.get())); -} - -/** @brief Vector mapping */ -std::shared_ptr base::map_functor::create(array const * a) const -{ - std::string dtype = to_string(a->dtype()); - unsigned int id = binder_.get(a->data()); - //Scalar - if(a->shape()[0]==1 && a->shape()[1]==1) - return std::shared_ptr(new mapped_array(dtype, id, 's')); - //Column vector - else if(a->shape()[0]>1 && a->shape()[1]==1) - return std::shared_ptr(new mapped_array(dtype, id, 'c')); - //Row vector - else if(a->shape()[0]==1 && a->shape()[1]>1) - return std::shared_ptr(new mapped_array(dtype, id, 'r')); - //Matrix - else - return std::shared_ptr(new mapped_array(dtype, id, 'm')); -} - -std::shared_ptr base::map_functor::create(repeat_infos const &) const -{ - //TODO: Make it less specific! - return std::shared_ptr(new mapped_tuple(size_type(device_),binder_.get(),4)); -} - -std::shared_ptr base::map_functor::create(lhs_rhs_element const & lhs_rhs) const -{ - switch(lhs_rhs.type_family) - { - case INFOS_TYPE_FAMILY: return create(lhs_rhs.tuple); - case VALUE_TYPE_FAMILY: return create(lhs_rhs.dtype, lhs_rhs.vscalar); - case ARRAY_TYPE_FAMILY: return create(lhs_rhs.array); - default: throw ""; - } -} - - -base::map_functor::map_functor(symbolic_binder & binder, mapping_type & mapping, driver::Device const & device) : binder_(binder), mapping_(mapping), device_(device){ } - -/** @brief Traversal functor */ -void base::map_functor::operator()(isaac::array_expression const & array_expression, int_t root_idx, leaf_t leaf_t) const -{ - mapping_type::key_type key(root_idx, leaf_t); - array_expression::node const & root_node = array_expression.tree()[root_idx]; - - if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != COMPOSITE_OPERATOR_FAMILY) - mapping_.insert(mapping_type::value_type(key, create(root_node.lhs))); - else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != COMPOSITE_OPERATOR_FAMILY) - mapping_.insert(mapping_type::value_type(key, create(root_node.rhs))); - else if ( leaf_t== PARENT_NODE_TYPE) - { - if (root_node.op.type==OPERATOR_VDIAG_TYPE) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (root_node.op.type==OPERATOR_MATRIX_DIAG_TYPE) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (root_node.op.type==OPERATOR_MATRIX_ROW_TYPE) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (root_node.op.type==OPERATOR_MATRIX_COLUMN_TYPE) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (detail::is_scalar_dot(root_node)) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (detail::is_vector_dot(root_node)) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (root_node.op.type_family == OPERATOR_GEMM_TYPE_FAMILY) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (root_node.op.type == OPERATOR_REPEAT_TYPE) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE) - mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); - else if (detail::is_cast(root_node.op)) - mapping_.insert(mapping_type::value_type(key, std::shared_ptr(new mapped_cast(root_node.op.type, binder_.get())))); - } -} - -base::set_arguments_functor::set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, driver::Kernel & kernel) : - binder_(binder), current_arg_(current_arg), kernel_(kernel){ } - -void base::set_arguments_functor::set_arguments(numeric_type dtype, values_holder const & scal) const -{ - switch(dtype) - { -// case BOOL_TYPE: kernel_.setArg(current_arg_++, scal.bool8); break; - case CHAR_TYPE: kernel_.setArg(current_arg_++, scal.int8); break; - case UCHAR_TYPE: kernel_.setArg(current_arg_++, scal.uint8); break; - case SHORT_TYPE: kernel_.setArg(current_arg_++, scal.int16); break; - case USHORT_TYPE: kernel_.setArg(current_arg_++, scal.uint16); break; - case INT_TYPE: kernel_.setArg(current_arg_++, scal.int32); break; - case UINT_TYPE: kernel_.setArg(current_arg_++, scal.uint32); break; - case LONG_TYPE: kernel_.setArg(current_arg_++, scal.int64); break; - case ULONG_TYPE: kernel_.setArg(current_arg_++, scal.uint64); break; -// case HALF_TYPE: kernel_.setArg(current_arg_++, scal.float16); break; - case FLOAT_TYPE: kernel_.setArg(current_arg_++, scal.float32); break; - case DOUBLE_TYPE: kernel_.setArg(current_arg_++, scal.float64); break; - default: throw unknown_datatype(dtype); - } -} - -/** @brief Vector mapping */ -void base::set_arguments_functor::set_arguments(array const * a) const -{ - bool is_bound = binder_.bind(a->data()); - if (is_bound) - { - kernel_.setArg(current_arg_++, a->data()); - //scalar - if(a->shape()[0]==1 && a->shape()[1]==1) - { - kernel_.setSizeArg(current_arg_++, a->start()[0]); - } - //array - else if(a->shape()[0]==1 || a->shape()[1]==1) - { - kernel_.setSizeArg(current_arg_++, std::max(a->start()[0], a->start()[1])); - kernel_.setSizeArg(current_arg_++, std::max(a->stride()[0], a->stride()[1])); - } - else - { - kernel_.setSizeArg(current_arg_++, a->ld()); - kernel_.setSizeArg(current_arg_++, a->start()[0]); - kernel_.setSizeArg(current_arg_++, a->start()[1]); - kernel_.setSizeArg(current_arg_++, a->stride()[0]); - kernel_.setSizeArg(current_arg_++, a->stride()[1]); - } - } -} - -void base::set_arguments_functor::set_arguments(repeat_infos const & i) const -{ - kernel_.setSizeArg(current_arg_++, i.sub1); - kernel_.setSizeArg(current_arg_++, i.sub2); - kernel_.setSizeArg(current_arg_++, i.rep1); - kernel_.setSizeArg(current_arg_++, i.rep2); -} - -void base::set_arguments_functor::set_arguments(lhs_rhs_element const & lhs_rhs) const -{ - switch(lhs_rhs.type_family) - { - case VALUE_TYPE_FAMILY: return set_arguments(lhs_rhs.dtype, lhs_rhs.vscalar); - case ARRAY_TYPE_FAMILY: return set_arguments(lhs_rhs.array); - case INFOS_TYPE_FAMILY: return set_arguments(lhs_rhs.tuple); - default: throw invalid_exception("Unrecognized type family"); - } -} - -/** @brief Traversal functor: */ -void base::set_arguments_functor::operator()(isaac::array_expression const & array_expression, int_t root_idx, leaf_t leaf_t) const -{ - array_expression::node const & root_node = array_expression.tree()[root_idx]; - if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != COMPOSITE_OPERATOR_FAMILY) - set_arguments(root_node.lhs); - else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != COMPOSITE_OPERATOR_FAMILY) - set_arguments(root_node.rhs); -} - -void base::compute_dot(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op) -{ - if (detail::is_elementwise_function(op)) - os << acc << "=" << evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl; - else - os << acc << "= (" << acc << ")" << evaluate(op.type) << "(" << cur << ");" << std::endl; -} - -void base::compute_index_dot(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op) -{ - // os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl; - os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl; - os << acc_value << "="; - if (op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE) os << "fmax"; - if (op.type==OPERATOR_ELEMENT_ARGMAX_TYPE) os << "max"; - if (op.type==OPERATOR_ELEMENT_ARGFMIN_TYPE) os << "fmin"; - if (op.type==OPERATOR_ELEMENT_ARGMIN_TYPE) os << "min"; - os << "(" << acc_value << "," << cur_value << ");"<< std::endl; -} - -void base::process_all(std::string const & type_key, std::string const & str, - kernel_generation_stream & stream, std::vector const & mappings) -{ - for (const auto & mapping : mappings) - for (mapping_type::const_iterator mmit = mapping.begin(); mmit != mapping.end(); ++mmit) - if (mmit->second->type_key()==type_key) - stream << mmit->second->process(str) << std::endl; -} - - -void base::base::process_all_at(std::string const & type_key, std::string const & str, - kernel_generation_stream & stream, std::vector const & mappings, - size_t root_idx, leaf_t leaf) -{ - for (const auto & mapping : mappings) - { - mapped_object * obj = mapping.at(mapping_key(root_idx, leaf)).get(); - if (obj->type_key()==type_key) - stream << obj->process(str) << std::endl; - } -} - -std::string base::neutral_element(op_element const & op, driver::backend_type backend, std::string const & dtype) -{ - std::string INF = Infinity(backend, dtype).get(); - std::string N_INF = "-" + INF; - - switch (op.type) - { - case OPERATOR_ADD_TYPE : return "0"; - case OPERATOR_MULT_TYPE : return "1"; - case OPERATOR_DIV_TYPE : return "1"; - case OPERATOR_ELEMENT_FMAX_TYPE : return N_INF; - case OPERATOR_ELEMENT_ARGFMAX_TYPE : return N_INF; - case OPERATOR_ELEMENT_MAX_TYPE : return N_INF; - case OPERATOR_ELEMENT_ARGMAX_TYPE : return N_INF; - case OPERATOR_ELEMENT_FMIN_TYPE : return INF; - case OPERATOR_ELEMENT_ARGFMIN_TYPE : return INF; - case OPERATOR_ELEMENT_MIN_TYPE : return INF; - case OPERATOR_ELEMENT_ARGMIN_TYPE : return INF; - - default: throw operation_not_supported_exception("Unsupported dot operator : no neutral element known"); - } -} - -std::string base::generate_arguments(std::vector const & mappings, std::map const & accessors, expressions_tuple const & expressions) -{ - kernel_generation_stream stream; - process(stream, PARENT_NODE_TYPE, accessors, expressions, mappings); - std::string res = stream.str(); - res.erase(res.rfind(',')); - return res; -} - -std::string base::generate_arguments(std::string const & data_type, driver::Device const & device, std::vector const & mappings, expressions_tuple const & expressions) -{ - std::string kwglobal = Global(device.backend()).get(); - std::string _size_t = size_type(device); - return generate_arguments(mappings, { {"array0", kwglobal + " #scalartype* #pointer, " + _size_t + " #start,"}, - {"host_scalar", "#scalartype #name,"}, - {"array1", kwglobal + " " + data_type + "* #pointer, " + _size_t + " #start, " + _size_t + " #stride,"}, - {"array2", kwglobal + " " + data_type + "* #pointer, " + _size_t + " #ld, " + _size_t + " #start1, " + _size_t + " #start2, " + _size_t + " #stride1, " + _size_t + " #stride2,"}, - {"tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"}} - , expressions); -} - - - -void base::set_arguments(expressions_tuple const & expressions, driver::Kernel & kernel, unsigned int & current_arg) -{ - std::shared_ptr binder = make_binder(); - for (const auto & elem : expressions.data()) - traverse(*elem, (elem)->root(), set_arguments_functor(*binder, current_arg, kernel), true); -} - -base::invalid_exception::invalid_exception() : message_() {} - -base::invalid_exception::invalid_exception(std::string message) : - message_("ISAAC: Internal error: The generator cannot apply the given template to the given array_expression: " + message + "\n" - "If you are using a builtin template, please report on viennacl-support@lists.sourceforge.net! We will provide a fix as soon as possible\n" - "If you are using your own template, please try using other parameters") {} - -const char* base::invalid_exception::what() const throw() { return message_.c_str(); } - -base::invalid_exception::~invalid_exception() throw() {} - -bool base::is_node_trans(array_expression::container_type const & array, size_t root_idx, leaf_t leaf_type) -{ - bool res = false; - lhs_rhs_element array_expression::node::*ptr; - if (leaf_type==LHS_NODE_TYPE) - ptr = &array_expression::node::lhs; - else - ptr = &array_expression::node::rhs; - array_expression::node const * node = &array[root_idx]; - while ((node->*ptr).type_family==COMPOSITE_OPERATOR_FAMILY) - { - if (array[(node->*ptr).node_index].op.type==OPERATOR_TRANS_TYPE) - res = !res; - node = &array[(node->*ptr).node_index]; - } - return res; -} - -std::string base::append_simd_suffix(std::string const & str, unsigned int i) -{ - assert(i < 16); - char suffixes[] = {'0','1','2','3','4','5','6','7','8','9', - 'a','b','c','d','e','f'}; - return str + tools::to_string(suffixes[i]); -} - -bool base::is_strided(array_expression::node const & node) -{ - return node.op.type==OPERATOR_VDIAG_TYPE - || node.op.type==OPERATOR_MATRIX_DIAG_TYPE - || node.op.type==OPERATOR_MATRIX_ROW_TYPE - || node.op.type==OPERATOR_MATRIX_COLUMN_TYPE - || node.op.type==OPERATOR_OUTER_PROD_TYPE; -} bool base::requires_fallback(expressions_tuple const & expressions) { @@ -379,96 +64,6 @@ std::pair base::matrix_size(array_expression::node const & node) return std::make_pair(node.lhs.array->shape()[0],node.lhs.array->shape()[1]); } -bool base::is_dot(array_expression::node const & node) -{ - return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY - || node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY - || node.op.type_family==OPERATOR_ROWS_DOT_TYPE_FAMILY; -} - -bool base::is_index_dot(op_element const & op) -{ - return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE - || op.type==OPERATOR_ELEMENT_ARGMAX_TYPE - || op.type==OPERATOR_ELEMENT_ARGFMIN_TYPE - || op.type==OPERATOR_ELEMENT_ARGMIN_TYPE; -} - -std::string base::access_vector_type(std::string const & v, int i) -{ - switch(i) - { - case 0: return v + ".x"; - case 1: return v + ".y"; - case 2: return v + ".z"; - case 3: return v + ".w"; - default: throw; - } -} - -std::string base::vstore(unsigned int simd_width, std::string const & - #ifdef ISAAC_WITH_CUDA - dtype - #endif - , std::string const & value, std::string const & offset, std::string const & ptr, driver::backend_type backend) -{ - if (simd_width==1) - return "(" + ptr + ")[" + offset + "] = " + value; - else - { - switch(backend) - { -#ifdef ISAAC_WITH_CUDA - case driver::CUDA: - return "reinterpret_cast<" + append_width(dtype,simd_width) + "*>(" + ptr + ")[" + offset + "] = " + value; -#endif - case driver::OPENCL: - return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")"; - default: - throw; - } - } -} - -std::string base::vload(unsigned int simd_width, std::string const & - #ifdef ISAAC_WITH_CUDA - dtype - #endif - , std::string const & offset, std::string const & ptr, driver::backend_type backend) -{ - if (simd_width==1) - return "(" + ptr + ")[" + offset + "]"; - else - { - switch(backend) - { -#ifdef ISAAC_WITH_CUDA - case driver::CUDA: - return "reinterpret_cast<" + append_width(dtype, simd_width) + "*>(" + ptr + ")[" + offset + "]"; -#endif - case driver::OPENCL: - return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")"; - default: - throw; - } - } -} - -std::string base::append_width(std::string const & str, unsigned int width) -{ - if (width==1) - return str; - return str + tools::to_string(width); -} - -std::shared_ptr base::make_binder() -{ - if (binding_policy_==BIND_TO_HANDLE) - return std::shared_ptr(new bind_to_handle()); - else - return std::shared_ptr(new bind_all_unique()); -} - base::base(binding_policy_t binding_policy) : binding_policy_(binding_policy) {} @@ -492,7 +87,12 @@ std::string base::generate(std::string const & suffix, expressions_tuple const & //Create mapping std::vector mappings(expressions.data().size()); - std::shared_ptr binder = make_binder(); + std::unique_ptr binder; + if (binding_policy_==BIND_TO_HANDLE) + binder.reset(new bind_to_handle()); + else + binder.reset(new bind_all_unique()); + for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit) traverse(**sit, (*sit)->root(), map_functor(*binder,*mit,device), true); diff --git a/lib/kernels/templates/dot.cpp b/lib/kernels/templates/dot.cpp index 009a3c676..df6e414d7 100644 --- a/lib/kernels/templates/dot.cpp +++ b/lib/kernels/templates/dot.cpp @@ -4,6 +4,10 @@ #include "isaac/kernels/keywords.h" #include "tools/loop.hpp" +#include "tools/reductions.hpp" +#include "tools/vector_types.hpp" +#include "tools/arguments.hpp" + #include "to_string.hpp" @@ -341,7 +345,7 @@ void dot::enqueue(driver::CommandQueue & queue, driver::Program const & program, i++; } - set_arguments(expressions, kernel, n_arg); + set_arguments(expressions, kernel, n_arg, binding_policy_); } for (unsigned int k = 0; k < 2; k++) diff --git a/lib/kernels/templates/gemm.cpp b/lib/kernels/templates/gemm.cpp index 16de941d8..1cb387b1a 100644 --- a/lib/kernels/templates/gemm.cpp +++ b/lib/kernels/templates/gemm.cpp @@ -5,6 +5,9 @@ #include "isaac/symbolic/preset.h" #include "isaac/exception/operation_not_supported.h" +#include "tools/arguments.hpp" +#include "tools/vector_types.hpp" + #include "to_string.hpp" #include "align.hpp" diff --git a/lib/kernels/templates/gemv.cpp b/lib/kernels/templates/gemv.cpp index 00eca190c..5f20ed571 100644 --- a/lib/kernels/templates/gemv.cpp +++ b/lib/kernels/templates/gemv.cpp @@ -4,9 +4,12 @@ #include "isaac/kernels/keywords.h" #include "isaac/kernels/templates/gemv.h" -#include "to_string.hpp" - +#include "tools/arguments.hpp" #include "tools/loop.hpp" +#include "tools/reductions.hpp" +#include "tools/vector_types.hpp" + +#include "to_string.hpp" namespace isaac { @@ -399,7 +402,7 @@ void gemv::enqueue(driver::CommandQueue & queue, driver::Program const & program kernel.setArg(n_arg++, tmp[i]); i++; } - set_arguments(expressions, kernel, n_arg); + set_arguments(expressions, kernel, n_arg, binding_policy_); } //NDRange diff --git a/lib/kernels/templates/ger.cpp b/lib/kernels/templates/ger.cpp index 18b688906..9c2fd817e 100644 --- a/lib/kernels/templates/ger.cpp +++ b/lib/kernels/templates/ger.cpp @@ -4,7 +4,9 @@ #include "isaac/symbolic/io.h" #include "isaac/kernels/keywords.h" +#include "tools/arguments.hpp" #include "tools/loop.hpp" +#include "tools/vector_types.hpp" namespace isaac { @@ -124,7 +126,7 @@ void ger::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & prog std::vector MN = input_sizes(expressions); kernel.setSizeArg(current_arg++, MN[0]); kernel.setSizeArg(current_arg++, MN[1]); - set_arguments(expressions, kernel, current_arg); + set_arguments(expressions, kernel, current_arg, binding_policy_); controller.execution_options().enqueue(program.context(), kernel, global, local); } diff --git a/lib/kernels/templates/tools/arguments.hpp b/lib/kernels/templates/tools/arguments.hpp new file mode 100644 index 000000000..58dcdbc20 --- /dev/null +++ b/lib/kernels/templates/tools/arguments.hpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include "isaac/kernels/mapped_object.h" +#include "isaac/kernels/parse.h" +#include "isaac/array.h" + +namespace isaac +{ +namespace templates +{ + +//Generate +inline std::string generate_arguments(std::string const & data_type, driver::Device const & device, std::vector const & mappings, expressions_tuple const & expressions) +{ + std::string kwglobal = Global(device.backend()).get(); + std::string _size_t = size_type(device); + + kernel_generation_stream stream; + + process(stream, PARENT_NODE_TYPE, { {"array0", kwglobal + " #scalartype* #pointer, " + _size_t + " #start,"}, + {"host_scalar", "#scalartype #name,"}, + {"array1", kwglobal + " " + data_type + "* #pointer, " + _size_t + " #start, " + _size_t + " #stride,"}, + {"array2", kwglobal + " " + data_type + "* #pointer, " + _size_t + " #ld, " + _size_t + " #start1, " + _size_t + " #start2, " + _size_t + " #stride1, " + _size_t + " #stride2,"}, + {"tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"}} + , expressions, mappings); + + std::string res = stream.str(); + res.erase(res.rfind(',')); + return res; +} + +//Enqueue +class set_arguments_functor : public traversal_functor +{ +public: + typedef void result_type; + + set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, driver::Kernel & kernel) + : binder_(binder), current_arg_(current_arg), kernel_(kernel) + { + } + + void set_arguments(numeric_type dtype, values_holder const & scal) const + { + switch(dtype) + { + // case BOOL_TYPE: kernel_.setArg(current_arg_++, scal.bool8); break; + case CHAR_TYPE: kernel_.setArg(current_arg_++, scal.int8); break; + case UCHAR_TYPE: kernel_.setArg(current_arg_++, scal.uint8); break; + case SHORT_TYPE: kernel_.setArg(current_arg_++, scal.int16); break; + case USHORT_TYPE: kernel_.setArg(current_arg_++, scal.uint16); break; + case INT_TYPE: kernel_.setArg(current_arg_++, scal.int32); break; + case UINT_TYPE: kernel_.setArg(current_arg_++, scal.uint32); break; + case LONG_TYPE: kernel_.setArg(current_arg_++, scal.int64); break; + case ULONG_TYPE: kernel_.setArg(current_arg_++, scal.uint64); break; + // case HALF_TYPE: kernel_.setArg(current_arg_++, scal.float16); break; + case FLOAT_TYPE: kernel_.setArg(current_arg_++, scal.float32); break; + case DOUBLE_TYPE: kernel_.setArg(current_arg_++, scal.float64); break; + default: throw unknown_datatype(dtype); + } + } + + void set_arguments(array const * a) const + { + bool is_bound = binder_.bind(a->data()); + if (is_bound) + { + kernel_.setArg(current_arg_++, a->data()); + //scalar + if(a->shape()[0]==1 && a->shape()[1]==1) + { + kernel_.setSizeArg(current_arg_++, a->start()[0]); + } + //array + else if(a->shape()[0]==1 || a->shape()[1]==1) + { + kernel_.setSizeArg(current_arg_++, std::max(a->start()[0], a->start()[1])); + kernel_.setSizeArg(current_arg_++, std::max(a->stride()[0], a->stride()[1])); + } + else + { + kernel_.setSizeArg(current_arg_++, a->ld()); + kernel_.setSizeArg(current_arg_++, a->start()[0]); + kernel_.setSizeArg(current_arg_++, a->start()[1]); + kernel_.setSizeArg(current_arg_++, a->stride()[0]); + kernel_.setSizeArg(current_arg_++, a->stride()[1]); + } + } + } + + void set_arguments(repeat_infos const & i) const + { + kernel_.setSizeArg(current_arg_++, i.sub1); + kernel_.setSizeArg(current_arg_++, i.sub2); + kernel_.setSizeArg(current_arg_++, i.rep1); + kernel_.setSizeArg(current_arg_++, i.rep2); + } + + + void set_arguments(lhs_rhs_element const & lhs_rhs) const + { + switch(lhs_rhs.type_family) + { + case VALUE_TYPE_FAMILY: return set_arguments(lhs_rhs.dtype, lhs_rhs.vscalar); + case ARRAY_TYPE_FAMILY: return set_arguments(lhs_rhs.array); + case INFOS_TYPE_FAMILY: return set_arguments(lhs_rhs.tuple); + default: throw std::runtime_error("Unrecognized type family"); + } + } + + void operator()(isaac::array_expression const & array_expression, int_t root_idx, leaf_t leaf_t) const + { + array_expression::node const & root_node = array_expression.tree()[root_idx]; + if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != COMPOSITE_OPERATOR_FAMILY) + set_arguments(root_node.lhs); + else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != COMPOSITE_OPERATOR_FAMILY) + set_arguments(root_node.rhs); + } + + +private: + symbolic_binder & binder_; + unsigned int & current_arg_; + driver::Kernel & kernel_; +}; + +inline void set_arguments(expressions_tuple const & expressions, driver::Kernel & kernel, unsigned int & current_arg, binding_policy_t binding_policy) +{ + std::unique_ptr binder; + if (binding_policy==BIND_TO_HANDLE) + binder.reset(new bind_to_handle()); + else + binder.reset(new bind_all_unique()); + for (const auto & elem : expressions.data()) + traverse(*elem, (elem)->root(), set_arguments_functor(*binder, current_arg, kernel), true); +} + +} +} diff --git a/lib/kernels/templates/tools/map.hpp b/lib/kernels/templates/tools/map.hpp new file mode 100644 index 000000000..4fa3a7717 --- /dev/null +++ b/lib/kernels/templates/tools/map.hpp @@ -0,0 +1,121 @@ +#include +#include +#include "isaac/kernels/mapped_object.h" +#include "isaac/kernels/parse.h" + +namespace isaac +{ + +namespace templates +{ + +class map_functor : public traversal_functor +{ + + numeric_type get_numeric_type(isaac::array_expression const * array_expression, int_t root_idx) const +{ + array_expression::node const * root_node = &array_expression->tree()[root_idx]; + while (root_node->lhs.dtype==INVALID_NUMERIC_TYPE) + root_node = &array_expression->tree()[root_node->lhs.node_index]; + return root_node->lhs.dtype; +} + + template + std::shared_ptr binary_leaf(isaac::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const +{ + return std::shared_ptr(new T(to_string(array_expression->dtype()), binder_.get(), mapped_object::node_info(mapping, array_expression, root_idx))); +} + + std::shared_ptr create(numeric_type dtype, values_holder) const + { + std::string strdtype = to_string(dtype); + return std::shared_ptr(new mapped_host_scalar(strdtype, binder_.get())); + } + + std::shared_ptr create(array const * a) const + { + std::string dtype = to_string(a->dtype()); + unsigned int id = binder_.get(a->data()); + //Scalar + if(a->shape()[0]==1 && a->shape()[1]==1) + return std::shared_ptr(new mapped_array(dtype, id, 's')); + //Column vector + else if(a->shape()[0]>1 && a->shape()[1]==1) + return std::shared_ptr(new mapped_array(dtype, id, 'c')); + //Row vector + else if(a->shape()[0]==1 && a->shape()[1]>1) + return std::shared_ptr(new mapped_array(dtype, id, 'r')); + //Matrix + else + return std::shared_ptr(new mapped_array(dtype, id, 'm')); + } + + std::shared_ptr create(repeat_infos const &) const + { + //TODO: Make it less specific! + return std::shared_ptr(new mapped_tuple(size_type(device_),binder_.get(),4)); + } + + std::shared_ptr create(lhs_rhs_element const & lhs_rhs) const + { + switch(lhs_rhs.type_family) + { + case INFOS_TYPE_FAMILY: return create(lhs_rhs.tuple); + case VALUE_TYPE_FAMILY: return create(lhs_rhs.dtype, lhs_rhs.vscalar); + case ARRAY_TYPE_FAMILY: return create(lhs_rhs.array); + default: throw ""; + } + } + + +public: + map_functor(symbolic_binder & binder, mapping_type & mapping, const driver::Device &device) + : binder_(binder), mapping_(mapping), device_(device) + { + } + + void operator()(isaac::array_expression const & array_expression, int_t root_idx, leaf_t leaf_t) const + { + { + mapping_type::key_type key(root_idx, leaf_t); + array_expression::node const & root_node = array_expression.tree()[root_idx]; + + if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != COMPOSITE_OPERATOR_FAMILY) + mapping_.insert(mapping_type::value_type(key, create(root_node.lhs))); + else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != COMPOSITE_OPERATOR_FAMILY) + mapping_.insert(mapping_type::value_type(key, create(root_node.rhs))); + else if ( leaf_t== PARENT_NODE_TYPE) + { + if (root_node.op.type==OPERATOR_VDIAG_TYPE) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (root_node.op.type==OPERATOR_MATRIX_DIAG_TYPE) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (root_node.op.type==OPERATOR_MATRIX_ROW_TYPE) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (root_node.op.type==OPERATOR_MATRIX_COLUMN_TYPE) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (detail::is_scalar_dot(root_node)) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (detail::is_vector_dot(root_node)) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (root_node.op.type_family == OPERATOR_GEMM_TYPE_FAMILY) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (root_node.op.type == OPERATOR_REPEAT_TYPE) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE) + mapping_.insert(mapping_type::value_type(key, binary_leaf(&array_expression, root_idx, &mapping_))); + else if (detail::is_cast(root_node.op)) + mapping_.insert(mapping_type::value_type(key, std::shared_ptr(new mapped_cast(root_node.op.type, binder_.get())))); + } + } + } +private: + symbolic_binder & binder_; + mapping_type & mapping_; + driver::Device const & device_; +}; + + +} + +} diff --git a/lib/kernels/templates/tools/reductions.hpp b/lib/kernels/templates/tools/reductions.hpp new file mode 100644 index 000000000..757bcf611 --- /dev/null +++ b/lib/kernels/templates/tools/reductions.hpp @@ -0,0 +1,77 @@ +#include +#include + +#include "isaac/driver/common.h" +#include "isaac/kernels/keywords.h" +#include "isaac/kernels/stream.h" +#include "isaac/symbolic/expression.h" +#include "isaac/types.h" + +namespace isaac +{ +namespace templates +{ + +inline void compute_dot(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op) +{ + if (detail::is_elementwise_function(op)) + os << acc << "=" << evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl; + else + os << acc << "= (" << acc << ")" << evaluate(op.type) << "(" << cur << ");" << std::endl; +} + +inline void compute_index_dot(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op) +{ + // os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl; + os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl; + os << acc_value << "="; + if (op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE) os << "fmax"; + if (op.type==OPERATOR_ELEMENT_ARGMAX_TYPE) os << "max"; + if (op.type==OPERATOR_ELEMENT_ARGFMIN_TYPE) os << "fmin"; + if (op.type==OPERATOR_ELEMENT_ARGMIN_TYPE) os << "min"; + os << "(" << acc_value << "," << cur_value << ");"<< std::endl; +} + +inline std::string neutral_element(op_element const & op, driver::backend_type backend, std::string const & dtype) +{ + std::string INF = Infinity(backend, dtype).get(); + std::string N_INF = "-" + INF; + + switch (op.type) + { + case OPERATOR_ADD_TYPE : return "0"; + case OPERATOR_MULT_TYPE : return "1"; + case OPERATOR_DIV_TYPE : return "1"; + case OPERATOR_ELEMENT_FMAX_TYPE : return N_INF; + case OPERATOR_ELEMENT_ARGFMAX_TYPE : return N_INF; + case OPERATOR_ELEMENT_MAX_TYPE : return N_INF; + case OPERATOR_ELEMENT_ARGMAX_TYPE : return N_INF; + case OPERATOR_ELEMENT_FMIN_TYPE : return INF; + case OPERATOR_ELEMENT_ARGFMIN_TYPE : return INF; + case OPERATOR_ELEMENT_MIN_TYPE : return INF; + case OPERATOR_ELEMENT_ARGMIN_TYPE : return INF; + + default: throw std::runtime_error("Unsupported dot operator : no neutral element known"); + } +} + +inline bool is_dot(array_expression::node const & node) +{ + return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY + || node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY + || node.op.type_family==OPERATOR_ROWS_DOT_TYPE_FAMILY; +} + + +inline bool is_index_dot(op_element const & op) +{ + return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE + || op.type==OPERATOR_ELEMENT_ARGMAX_TYPE + || op.type==OPERATOR_ELEMENT_ARGFMIN_TYPE + || op.type==OPERATOR_ELEMENT_ARGMIN_TYPE; +} + + +} + +} diff --git a/lib/kernels/templates/tools/vector_types.hpp b/lib/kernels/templates/tools/vector_types.hpp new file mode 100644 index 000000000..95256383a --- /dev/null +++ b/lib/kernels/templates/tools/vector_types.hpp @@ -0,0 +1,85 @@ +#include +#include + +#include "isaac/driver/common.h" + +#include "to_string.hpp" + +namespace isaac +{ +namespace templates +{ + +inline std::string append_simd_suffix(std::string const & str, unsigned int i) +{ + assert(i < 16); + char suffixes[] = {'0','1','2','3','4','5','6','7','8','9', + 'a','b','c','d','e','f'}; + return str + tools::to_string(suffixes[i]); +} + + +inline std::string access_vector_type(std::string const & v, int i) +{ + switch(i) + { + case 0: return v + ".x"; + case 1: return v + ".y"; + case 2: return v + ".z"; + case 3: return v + ".w"; + default: throw; + } +} + +inline std::string append_width(std::string const & str, unsigned int width) +{ + if (width==1) + return str; + return str + tools::to_string(width); +} + + +inline std::string vstore(unsigned int simd_width, std::string const & dtype, std::string const & value, std::string const & offset, std::string const & ptr, driver::backend_type backend) +{ + std::string vdtype = append_width(dtype,simd_width); + if (simd_width==1) + return "(" + ptr + ")[" + offset + "] = " + value; + else + { + switch(backend) + { + #ifdef ISAAC_WITH_CUDA + case driver::CUDA: + return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "] = " + value; + #endif + case driver::OPENCL: + return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")"; + default: + throw; + } + } +} + +inline std::string vload(unsigned int simd_width, std::string const & dtype, std::string const & offset, std::string const & ptr, driver::backend_type backend) +{ + std::string vdtype = append_width(dtype,simd_width); + if (simd_width==1) + return "(" + ptr + ")[" + offset + "]"; + else + { + switch(backend) + { + #ifdef ISAAC_WITH_CUDA + case driver::CUDA: + return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "]"; + #endif + case driver::OPENCL: + return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")"; + default: + throw; + } + } +} + +} +} diff --git a/python/setup.py b/python/setup.py index 65aedc2b6..ad922f3fb 100644 --- a/python/setup.py +++ b/python/setup.py @@ -115,7 +115,7 @@ def main(): include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")] #Source files - src = 'src/lib/array.cpp src/lib/wrap/clBLAS.cpp src/lib/value_scalar.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/stream.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/model/model.cpp src/lib/model/database.cpp src/lib/model/predictors/random_forest.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program.cpp src/lib/driver/device.cpp src/lib/driver/event.cpp src/lib/driver/program_cache.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/driver/context.cpp src/lib/driver/platform.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] + src = 'src/lib/array.cpp src/lib/wrap/clBLAS.cpp src/lib/value_scalar.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/stream.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/model/model.cpp src/lib/model/database.cpp src/lib/model/predictors/random_forest.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program.cpp src/lib/driver/device.cpp src/lib/driver/event.cpp src/lib/driver/program_cache.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/driver/context.cpp src/lib/driver/platform.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] boostsrc = 'external/boost/libs/' for s in ['numpy','python','smart_ptr','system','thread']: src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]