2015-01-12 13:20:53 -05:00
|
|
|
#include <cassert>
|
|
|
|
|
|
|
|
#include "atidlas/array.h"
|
|
|
|
#include "atidlas/backend/templates/vaxpy.h"
|
|
|
|
#include "atidlas/backend/templates/reduction.h"
|
|
|
|
#include "atidlas/backend/templates/maxpy.h"
|
|
|
|
#include "atidlas/backend/templates/mreduction.h"
|
|
|
|
#include "atidlas/backend/templates/mproduct.h"
|
2015-01-17 10:48:02 -05:00
|
|
|
#include "atidlas/backend/templates/base.h"
|
2015-01-12 13:20:53 -05:00
|
|
|
#include "atidlas/backend/parse.h"
|
|
|
|
#include "atidlas/exception/operation_not_supported.h"
|
2015-01-19 21:29:47 -05:00
|
|
|
#include "atidlas/exception/unknown_datatype.h"
|
2015-01-12 13:20:53 -05:00
|
|
|
#include "atidlas/tools/to_string.hpp"
|
|
|
|
#include "atidlas/tools/make_map.hpp"
|
2015-01-17 10:48:02 -05:00
|
|
|
#include "atidlas/symbolic/io.h"
|
2015-01-12 13:20:53 -05:00
|
|
|
|
|
|
|
namespace atidlas
|
|
|
|
{
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
base::parameters_type::parameters_type(unsigned int _simd_width, int_t _local_size_1, int_t _local_size_2, int_t _num_kernels) : simd_width(_simd_width), local_size_0(_local_size_1), local_size_1(_local_size_2), num_kernels(_num_kernels)
|
2015-01-12 13:20:53 -05:00
|
|
|
{ }
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
numeric_type base::map_functor::get_numeric_type(atidlas::array_expression const * array_expression, int_t root_idx) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-31 22:01:48 -05:00
|
|
|
array_expression::node const * root_node = &array_expression->tree()[root_idx];
|
2015-01-12 13:20:53 -05:00
|
|
|
while (root_node->lhs.dtype==INVALID_NUMERIC_TYPE)
|
2015-01-31 22:01:48 -05:00
|
|
|
root_node = &array_expression->tree()[root_node->lhs.node_index];
|
2015-01-12 13:20:53 -05:00
|
|
|
return root_node->lhs.dtype;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Binary leaf */
|
|
|
|
template<class T>
|
2015-01-31 22:01:48 -05:00
|
|
|
tools::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-31 22:01:48 -05:00
|
|
|
return tools::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Scalar mapping */
|
2015-01-17 10:48:02 -05:00
|
|
|
tools::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
std::string strdtype = numeric_type_to_string(dtype);
|
|
|
|
return tools::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Vector mapping */
|
2015-01-18 14:52:45 -05:00
|
|
|
tools::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-18 14:52:45 -05:00
|
|
|
std::string dtype = numeric_type_to_string(a.dtype);
|
|
|
|
unsigned int id = binder_.get(a.data);
|
2015-01-19 21:29:47 -05:00
|
|
|
//Scalar
|
2015-01-18 14:52:45 -05:00
|
|
|
if(a.shape1==1 && a.shape2==1)
|
2015-01-19 21:29:47 -05:00
|
|
|
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
|
|
|
|
//Column vector
|
|
|
|
else if(a.shape1>1 && a.shape2==1)
|
|
|
|
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
|
|
|
|
//Row vector
|
|
|
|
else if(a.shape1==1 && a.shape2>1)
|
|
|
|
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
|
|
|
|
//Matrix
|
2015-01-12 13:20:53 -05:00
|
|
|
else
|
2015-01-19 21:29:47 -05:00
|
|
|
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
tools::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
//TODO: Make it less specific!
|
|
|
|
return tools::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
tools::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
switch(lhs_rhs.type_family)
|
|
|
|
{
|
2015-01-18 14:52:45 -05:00
|
|
|
case INFOS_TYPE_FAMILY: return create(lhs_rhs.tuple);
|
2015-01-12 13:20:53 -05:00
|
|
|
case VALUE_TYPE_FAMILY: return create(lhs_rhs.dtype, lhs_rhs.vscalar);
|
2015-01-18 14:52:45 -05:00
|
|
|
case ARRAY_TYPE_FAMILY: return create(lhs_rhs.array);
|
2015-01-12 13:20:53 -05:00
|
|
|
default: throw "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
base::map_functor::map_functor(symbolic_binder & binder, mapping_type & mapping) : binder_(binder), mapping_(mapping){ }
|
2015-01-12 13:20:53 -05:00
|
|
|
|
|
|
|
/** @brief Traversal functor */
|
2015-01-31 22:01:48 -05:00
|
|
|
void base::map_functor::operator()(atidlas::array_expression const & array_expression, int_t root_idx, leaf_t leaf_t) const {
|
2015-01-12 13:20:53 -05:00
|
|
|
mapping_type::key_type key(root_idx, leaf_t);
|
2015-01-31 22:01:48 -05:00
|
|
|
array_expression::node const & root_node = array_expression.tree()[root_idx];
|
2015-01-12 13:20:53 -05:00
|
|
|
|
|
|
|
if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != COMPOSITE_OPERATOR_FAMILY)
|
|
|
|
mapping_.insert(mapping_type::value_type(key, create(root_node.lhs)));
|
|
|
|
else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != COMPOSITE_OPERATOR_FAMILY)
|
|
|
|
mapping_.insert(mapping_type::value_type(key, create(root_node.rhs)));
|
|
|
|
else if ( leaf_t== PARENT_NODE_TYPE)
|
|
|
|
{
|
2015-01-17 10:48:02 -05:00
|
|
|
if (root_node.op.type==OPERATOR_VDIAG_TYPE)
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_vdiag>(&array_expression, root_idx, &mapping_)));
|
2015-01-12 13:20:53 -05:00
|
|
|
else if (root_node.op.type==OPERATOR_MATRIX_DIAG_TYPE)
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_diag>(&array_expression, root_idx, &mapping_)));
|
2015-01-12 13:20:53 -05:00
|
|
|
else if (root_node.op.type==OPERATOR_MATRIX_ROW_TYPE)
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_row>(&array_expression, root_idx, &mapping_)));
|
2015-01-12 13:20:53 -05:00
|
|
|
else if (root_node.op.type==OPERATOR_MATRIX_COLUMN_TYPE)
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&array_expression, root_idx, &mapping_)));
|
2015-01-12 13:20:53 -05:00
|
|
|
else if (detail::is_scalar_reduction(root_node))
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_scalar_reduction>(&array_expression, root_idx, &mapping_)));
|
2015-01-12 13:20:53 -05:00
|
|
|
else if (detail::is_vector_reduction(root_node))
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_mreduction>(&array_expression, root_idx, &mapping_)));
|
2015-01-12 13:20:53 -05:00
|
|
|
else if (root_node.op.type_family == OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY)
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_mproduct>(&array_expression, root_idx, &mapping_)));
|
2015-01-17 10:48:02 -05:00
|
|
|
else if (root_node.op.type == OPERATOR_REPEAT_TYPE)
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_repeat>(&array_expression, root_idx, &mapping_)));
|
2015-01-17 10:48:02 -05:00
|
|
|
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)
|
2015-01-31 22:01:48 -05:00
|
|
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_outer>(&array_expression, root_idx, &mapping_)));
|
2015-01-29 01:00:50 -05:00
|
|
|
else if (detail::is_cast(root_node.op))
|
|
|
|
mapping_.insert(mapping_type::value_type(key, tools::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
base::set_arguments_functor::set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, cl::Kernel & kernel) :
|
2015-01-12 13:20:53 -05:00
|
|
|
binder_(binder), current_arg_(current_arg), kernel_(kernel){ }
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::set_arguments_functor::set_arguments(numeric_type dtype, values_holder const & scal) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
switch(dtype)
|
|
|
|
{
|
2015-01-29 15:19:40 -05:00
|
|
|
// case BOOL_TYPE: kernel_.setArg(current_arg_++, scal.bool8); break;
|
2015-01-12 13:20:53 -05:00
|
|
|
case CHAR_TYPE: kernel_.setArg(current_arg_++, scal.int8); break;
|
|
|
|
case UCHAR_TYPE: kernel_.setArg(current_arg_++, scal.uint8); break;
|
|
|
|
case SHORT_TYPE: kernel_.setArg(current_arg_++, scal.int16); break;
|
|
|
|
case USHORT_TYPE: kernel_.setArg(current_arg_++, scal.uint16); break;
|
|
|
|
case INT_TYPE: kernel_.setArg(current_arg_++, scal.int32); break;
|
|
|
|
case UINT_TYPE: kernel_.setArg(current_arg_++, scal.uint32); break;
|
|
|
|
case LONG_TYPE: kernel_.setArg(current_arg_++, scal.int64); break;
|
|
|
|
case ULONG_TYPE: kernel_.setArg(current_arg_++, scal.uint64); break;
|
2015-01-29 15:19:40 -05:00
|
|
|
// case HALF_TYPE: kernel_.setArg(current_arg_++, scal.float16); break;
|
2015-01-12 13:20:53 -05:00
|
|
|
case FLOAT_TYPE: kernel_.setArg(current_arg_++, scal.float32); break;
|
|
|
|
case DOUBLE_TYPE: kernel_.setArg(current_arg_++, scal.float64); break;
|
2015-01-28 17:08:39 -05:00
|
|
|
default: throw unknown_datatype(dtype);
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Vector mapping */
|
2015-01-18 14:52:45 -05:00
|
|
|
void base::set_arguments_functor::set_arguments(array_infos const & x) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-18 14:52:45 -05:00
|
|
|
bool is_bound = binder_.bind(x.data);
|
2015-01-12 13:20:53 -05:00
|
|
|
if (is_bound)
|
|
|
|
{
|
2015-01-19 21:29:47 -05:00
|
|
|
kernel_.setArg(current_arg_++, x.data);
|
2015-01-12 13:20:53 -05:00
|
|
|
//scalar
|
2015-01-18 14:52:45 -05:00
|
|
|
if(x.shape1==1 && x.shape2==1)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-19 21:29:47 -05:00
|
|
|
kernel_.setArg(current_arg_++, cl_uint(x.start1));
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
//array
|
2015-01-19 21:29:47 -05:00
|
|
|
else if(x.shape1==1 || x.shape2==1)
|
|
|
|
{
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(std::max(x.start1, x.start2)));
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(std::max(x.stride1, x.stride2)));
|
|
|
|
}
|
2015-01-12 13:20:53 -05:00
|
|
|
else
|
|
|
|
{
|
2015-01-19 21:29:47 -05:00
|
|
|
kernel_.setArg(current_arg_++, cl_uint(x.ld));
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(x.start1));
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(x.start2));
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(x.stride1));
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(x.stride2));
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::set_arguments_functor::set_arguments(repeat_infos const & i) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-18 14:52:45 -05:00
|
|
|
kernel_.setArg(current_arg_++, cl_uint(i.sub1));
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(i.sub2));
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(i.rep1));
|
|
|
|
kernel_.setArg(current_arg_++, cl_uint(i.rep2));
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::set_arguments_functor::set_arguments(lhs_rhs_element const & lhs_rhs) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
switch(lhs_rhs.type_family)
|
|
|
|
{
|
|
|
|
case VALUE_TYPE_FAMILY: return set_arguments(lhs_rhs.dtype, lhs_rhs.vscalar);
|
2015-01-18 14:52:45 -05:00
|
|
|
case ARRAY_TYPE_FAMILY: return set_arguments(lhs_rhs.array);
|
|
|
|
case INFOS_TYPE_FAMILY: return set_arguments(lhs_rhs.tuple);
|
2015-01-28 17:08:39 -05:00
|
|
|
default: throw invalid_exception("Unrecognized type family");
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Traversal functor: */
|
2015-01-31 22:01:48 -05:00
|
|
|
void base::set_arguments_functor::operator()(atidlas::array_expression const & array_expression, int_t root_idx, leaf_t leaf_t) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-31 22:01:48 -05:00
|
|
|
array_expression::node const & root_node = array_expression.tree()[root_idx];
|
2015-01-12 13:20:53 -05:00
|
|
|
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != COMPOSITE_OPERATOR_FAMILY)
|
|
|
|
set_arguments(root_node.lhs);
|
|
|
|
else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != COMPOSITE_OPERATOR_FAMILY)
|
|
|
|
set_arguments(root_node.rhs);
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::compute_reduction(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
if (detail::is_elementwise_function(op))
|
|
|
|
os << acc << "=" << evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl;
|
|
|
|
else
|
|
|
|
os << acc << "= (" << acc << ")" << evaluate(op.type) << "(" << cur << ");" << std::endl;
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::compute_index_reduction(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
// os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl;
|
|
|
|
os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl;
|
|
|
|
os << acc_value << "=";
|
|
|
|
if (op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE) os << "fmax";
|
|
|
|
if (op.type==OPERATOR_ELEMENT_ARGMAX_TYPE) os << "max";
|
|
|
|
if (op.type==OPERATOR_ELEMENT_ARGFMIN_TYPE) os << "fmin";
|
|
|
|
if (op.type==OPERATOR_ELEMENT_ARGMIN_TYPE) os << "min";
|
|
|
|
os << "(" << acc_value << "," << cur_value << ");"<< std::endl;
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::process_all(std::string const & type_key, std::string const & str,
|
2015-01-12 13:20:53 -05:00
|
|
|
kernel_generation_stream & stream, std::vector<mapping_type> const & mappings)
|
|
|
|
{
|
|
|
|
for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit)
|
|
|
|
for (mapping_type::const_iterator mmit = mit->begin(); mmit != mit->end(); ++mmit)
|
|
|
|
if (mmit->second->type_key()==type_key)
|
|
|
|
stream << mmit->second->process(str) << std::endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::base::process_all_at(std::string const & type_key, std::string const & str,
|
2015-01-12 13:20:53 -05:00
|
|
|
kernel_generation_stream & stream, std::vector<mapping_type> const & mappings,
|
|
|
|
size_t root_idx, leaf_t leaf)
|
|
|
|
{
|
|
|
|
for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit)
|
|
|
|
{
|
|
|
|
mapped_object * obj = mit->at(mapping_key(root_idx, leaf)).get();
|
|
|
|
if (obj->type_key()==type_key)
|
|
|
|
stream << obj->process(str) << std::endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
std::string base::neutral_element(op_element const & op)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
switch (op.type)
|
|
|
|
{
|
|
|
|
case OPERATOR_ADD_TYPE : return "0";
|
|
|
|
case OPERATOR_MULT_TYPE : return "1";
|
|
|
|
case OPERATOR_DIV_TYPE : return "1";
|
|
|
|
case OPERATOR_ELEMENT_FMAX_TYPE : return "-INFINITY";
|
|
|
|
case OPERATOR_ELEMENT_ARGFMAX_TYPE : return "-INFINITY";
|
|
|
|
case OPERATOR_ELEMENT_MAX_TYPE : return "-INFINITY";
|
|
|
|
case OPERATOR_ELEMENT_ARGMAX_TYPE : return "-INFINITY";
|
|
|
|
case OPERATOR_ELEMENT_FMIN_TYPE : return "INFINITY";
|
|
|
|
case OPERATOR_ELEMENT_ARGFMIN_TYPE : return "INFINITY";
|
|
|
|
case OPERATOR_ELEMENT_MIN_TYPE : return "INFINITY";
|
|
|
|
case OPERATOR_ELEMENT_ARGMIN_TYPE : return "INFINITY";
|
|
|
|
|
|
|
|
default: throw operation_not_supported_exception("Unsupported reduction operator : no neutral element known");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
std::string base::generate_arguments(std::vector<mapping_type> const & mappings, std::map<std::string, std::string> const & accessors, array_expressions_container const & array_expressions)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
kernel_generation_stream stream;
|
2015-01-31 22:01:48 -05:00
|
|
|
process(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings);
|
2015-01-12 13:20:53 -05:00
|
|
|
std::string res = stream.str();
|
|
|
|
res.erase(res.rfind(','));
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
std::string base::generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, array_expressions_container const & array_expressions)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-19 21:29:47 -05:00
|
|
|
return generate_arguments(mappings, tools::make_map<std::map<std::string, std::string> >("array0", "__global #scalartype* #pointer, uint #start,")
|
2015-01-12 13:20:53 -05:00
|
|
|
("host_scalar", "#scalartype #name,")
|
2015-01-17 15:47:52 -05:00
|
|
|
("array1", "__global " + data_type + "* #pointer, uint #start, uint #stride,")
|
|
|
|
("array2", "__global " + data_type + "* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,")
|
2015-01-31 22:01:48 -05:00
|
|
|
("tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"), array_expressions);
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
void base::set_arguments(array_expressions_container const & array_expressions, cl::Kernel & kernel, unsigned int & current_arg)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
2015-01-31 22:01:48 -05:00
|
|
|
for (array_expressions_container::data_type::const_iterator itt = array_expressions.data().begin(); itt != array_expressions.data().end(); ++itt)
|
2015-01-12 13:20:53 -05:00
|
|
|
traverse(**itt, (*itt)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::fill_kernel_name(char * ptr, unsigned int label, const char * suffix)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
*ptr++='k';
|
|
|
|
if (label==0)
|
|
|
|
*ptr++='0';
|
|
|
|
else
|
|
|
|
while (label>0)
|
|
|
|
{
|
|
|
|
*ptr++= (char)('0' + (label % 10));
|
|
|
|
label /= 10;
|
|
|
|
}
|
|
|
|
for(std::size_t i = 0 ; i < strlen(suffix);++i)
|
|
|
|
*ptr++=suffix[i];
|
|
|
|
*ptr++='\0';
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
base::invalid_exception::invalid_exception() : message_() {}
|
2015-01-12 13:20:53 -05:00
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
base::invalid_exception::invalid_exception(std::string message) :
|
2015-01-31 22:01:48 -05:00
|
|
|
message_("ViennaCL: Internal error: The generator cannot apply the given template to the given array_expression: " + message + "\n"
|
2015-01-12 13:20:53 -05:00
|
|
|
"If you are using a builtin template, please report on viennacl-support@lists.sourceforge.net! We will provide a fix as soon as possible\n"
|
|
|
|
"If you are using your own template, please try using other parameters") {}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
const char* base::invalid_exception::what() const throw() { return message_.c_str(); }
|
2015-01-12 13:20:53 -05:00
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
base::invalid_exception::~invalid_exception() throw() {}
|
2015-01-12 13:20:53 -05:00
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::fetching_loop_info(fetching_policy_type policy, std::string const & bound, kernel_generation_stream & stream, std::string & init, std::string & upper_bound, std::string & inc, std::string const & domain_id, std::string const & domain_size)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
if (policy==FETCH_FROM_GLOBAL_STRIDED)
|
|
|
|
{
|
|
|
|
init = domain_id;
|
|
|
|
upper_bound = bound;
|
|
|
|
inc = domain_size;
|
|
|
|
}
|
|
|
|
else if (policy==FETCH_FROM_GLOBAL_CONTIGUOUS)
|
|
|
|
{
|
|
|
|
std::string chunk_size = "chunk_size";
|
|
|
|
std::string chunk_start = "chunk_start";
|
|
|
|
std::string chunk_end = "chunk_end";
|
|
|
|
|
|
|
|
stream << "unsigned int " << chunk_size << " = (" << bound << "+" << domain_size << "-1)/" << domain_size << ";" << std::endl;
|
|
|
|
stream << "unsigned int " << chunk_start << " =" << domain_id << "*" << chunk_size << ";" << std::endl;
|
|
|
|
stream << "unsigned int " << chunk_end << " = min(" << chunk_start << "+" << chunk_size << ", " << bound << ");" << std::endl;
|
|
|
|
init = chunk_start;
|
|
|
|
upper_bound = chunk_end;
|
|
|
|
inc = "1";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
bool base::is_node_trans(array_expression::container_type const & array, size_t root_idx, leaf_t leaf_type)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
bool res = false;
|
2015-01-31 22:01:48 -05:00
|
|
|
lhs_rhs_element array_expression::node::*ptr;
|
2015-01-12 13:20:53 -05:00
|
|
|
if (leaf_type==LHS_NODE_TYPE)
|
2015-01-31 22:01:48 -05:00
|
|
|
ptr = &array_expression::node::lhs;
|
2015-01-12 13:20:53 -05:00
|
|
|
else
|
2015-01-31 22:01:48 -05:00
|
|
|
ptr = &array_expression::node::rhs;
|
|
|
|
array_expression::node const * node = &array[root_idx];
|
2015-01-12 13:20:53 -05:00
|
|
|
while ((node->*ptr).type_family==COMPOSITE_OPERATOR_FAMILY)
|
|
|
|
{
|
|
|
|
if (array[(node->*ptr).node_index].op.type==OPERATOR_TRANS_TYPE)
|
|
|
|
res = !res;
|
|
|
|
node = &array[(node->*ptr).node_index];
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
std::string base::append_simd_suffix(std::string const & str, unsigned int i)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
assert(i < 16);
|
|
|
|
char suffixes[] = {'0','1','2','3','4','5','6','7','8','9',
|
|
|
|
'a','b','c','d','e','f'};
|
|
|
|
return str + tools::to_string(suffixes[i]);
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
bool base::is_strided(array_expression::node const & node)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-17 10:48:02 -05:00
|
|
|
return node.op.type==OPERATOR_VDIAG_TYPE
|
2015-01-12 13:20:53 -05:00
|
|
|
|| node.op.type==OPERATOR_MATRIX_DIAG_TYPE
|
|
|
|
|| node.op.type==OPERATOR_MATRIX_ROW_TYPE
|
2015-01-17 10:48:02 -05:00
|
|
|
|| node.op.type==OPERATOR_MATRIX_COLUMN_TYPE
|
|
|
|
|| node.op.type==OPERATOR_OUTER_PROD_TYPE;
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
bool base::requires_fallback(array_expressions_container const & array_expressions)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-31 22:01:48 -05:00
|
|
|
for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it)
|
|
|
|
for(array_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt)
|
2015-01-28 22:07:09 -05:00
|
|
|
if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0))
|
|
|
|
|| (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0)))
|
2015-01-12 13:20:53 -05:00
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
int_t base::vector_size(array_expression::node const & node)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
using namespace tools;
|
|
|
|
if (node.op.type==OPERATOR_MATRIX_DIAG_TYPE)
|
2015-01-18 14:52:45 -05:00
|
|
|
return std::min<int_t>(node.lhs.array.shape1, node.lhs.array.shape2);
|
2015-01-12 13:20:53 -05:00
|
|
|
else if (node.op.type==OPERATOR_MATRIX_ROW_TYPE)
|
2015-01-18 14:52:45 -05:00
|
|
|
return node.lhs.array.shape2;
|
2015-01-12 13:20:53 -05:00
|
|
|
else if (node.op.type==OPERATOR_MATRIX_COLUMN_TYPE)
|
2015-01-18 14:52:45 -05:00
|
|
|
return node.lhs.array.shape1;
|
2015-01-12 13:20:53 -05:00
|
|
|
else
|
2015-01-18 14:52:45 -05:00
|
|
|
return std::max(node.lhs.array.shape1, node.lhs.array.shape2);
|
2015-01-12 13:20:53 -05:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
std::pair<int_t, int_t> base::matrix_size(array_expression::node const & node)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-17 10:48:02 -05:00
|
|
|
if (node.op.type==OPERATOR_VDIAG_TYPE)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-18 14:52:45 -05:00
|
|
|
int_t size = node.lhs.array.shape1;
|
2015-01-12 13:20:53 -05:00
|
|
|
return std::make_pair(size,size);
|
|
|
|
}
|
2015-01-17 10:48:02 -05:00
|
|
|
else if(node.op.type==OPERATOR_REPEAT_TYPE)
|
2015-01-18 14:52:45 -05:00
|
|
|
return std::make_pair(node.lhs.array.shape1*node.rhs.tuple.rep1, node.lhs.array.shape2*node.rhs.tuple.rep2);
|
2015-01-12 13:20:53 -05:00
|
|
|
else
|
2015-01-18 14:52:45 -05:00
|
|
|
return std::make_pair(node.lhs.array.shape1,node.lhs.array.shape2);
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
void base::element_wise_loop_1D(kernel_generation_stream & stream, loop_body_base const & loop_body,
|
2015-01-12 13:20:53 -05:00
|
|
|
fetching_policy_type fetch, unsigned int simd_width, std::string const & i, std::string const & bound, std::string const & domain_id, std::string const & domain_size)
|
|
|
|
{
|
|
|
|
std::string strwidth = tools::to_string(simd_width);
|
|
|
|
std::string boundround = bound + "/" + strwidth;
|
|
|
|
|
|
|
|
std::string init, upper_bound, inc;
|
|
|
|
fetching_loop_info(fetch, boundround, stream, init, upper_bound, inc, domain_id, domain_size);
|
|
|
|
stream << "for(unsigned int " << i << " = " << init << "; " << i << " < " << upper_bound << "; " << i << " += " << inc << ")" << std::endl;
|
|
|
|
stream << "{" << std::endl;
|
|
|
|
stream.inc_tab();
|
|
|
|
loop_body(stream, simd_width);
|
|
|
|
stream.dec_tab();
|
|
|
|
stream << "}" << std::endl;
|
|
|
|
|
|
|
|
if (simd_width>1)
|
|
|
|
{
|
|
|
|
stream << "for(unsigned int " << i << " = " << boundround << "*" << strwidth << " + " << domain_id << "; " << i << " < " << bound << "; " << i << " += " + domain_size + ")" << std::endl;
|
|
|
|
stream << "{" << std::endl;
|
|
|
|
stream.inc_tab();
|
|
|
|
loop_body(stream, 1);
|
|
|
|
stream.dec_tab();
|
|
|
|
stream << "}" << std::endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
bool base::is_reduction(array_expression::node const & node)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
return node.op.type_family==OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY
|
|
|
|
|| node.op.type_family==OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY
|
|
|
|
|| node.op.type_family==OPERATOR_ROWS_REDUCTION_TYPE_FAMILY;
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
bool base::is_index_reduction(op_element const & op)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE
|
|
|
|
|| op.type==OPERATOR_ELEMENT_ARGMAX_TYPE
|
|
|
|
|| op.type==OPERATOR_ELEMENT_ARGFMIN_TYPE
|
|
|
|
|| op.type==OPERATOR_ELEMENT_ARGMIN_TYPE;
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
std::string base::vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
if (simd_width==1)
|
|
|
|
return "(" + ptr + ")[" + offset + "] = " + value;
|
|
|
|
else
|
|
|
|
return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")";
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
std::string base::vload(unsigned int simd_width, std::string const & offset, std::string const & ptr)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
if (simd_width==1)
|
|
|
|
return "(" + ptr + ")[" + offset + "]";
|
|
|
|
else
|
|
|
|
return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")";
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
std::string base::append_width(std::string const & str, unsigned int width)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
if (width==1)
|
|
|
|
return str;
|
|
|
|
return str + tools::to_string(width);
|
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
unsigned int base::align(unsigned int to_round, unsigned int base)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
if (to_round % base == 0)
|
|
|
|
return to_round;
|
|
|
|
return (to_round + base - 1)/base * base;
|
|
|
|
}
|
|
|
|
|
2015-01-18 14:52:45 -05:00
|
|
|
tools::shared_ptr<symbolic_binder> base::make_binder()
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
if (binding_policy_==BIND_TO_HANDLE)
|
|
|
|
return tools::shared_ptr<symbolic_binder>(new bind_to_handle());
|
|
|
|
else
|
|
|
|
return tools::shared_ptr<symbolic_binder>(new bind_all_unique());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
base::base(binding_policy_t binding_policy) : binding_policy_(binding_policy)
|
2015-01-12 13:20:53 -05:00
|
|
|
{}
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
unsigned int base::lmem_usage(array_expressions_container const &) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{ return 0; }
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
unsigned int base::registers_usage(array_expressions_container const &) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{ return 0; }
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
base::~base()
|
2015-01-12 13:20:53 -05:00
|
|
|
{ }
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
std::vector<std::string> base::generate(unsigned int label, array_expressions_container const & array_expressions, cl::Device const & device)
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
2015-01-31 22:01:48 -05:00
|
|
|
array_expressions_container::data_type::const_iterator sit;
|
2015-01-12 13:20:53 -05:00
|
|
|
std::vector<mapping_type>::iterator mit;
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
if(int err = check_invalid(array_expressions, device))
|
2015-01-12 13:20:53 -05:00
|
|
|
throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err));
|
|
|
|
|
|
|
|
//Create mapping
|
2015-01-31 22:01:48 -05:00
|
|
|
std::vector<mapping_type> mappings(array_expressions.data().size());
|
2015-01-12 13:20:53 -05:00
|
|
|
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
2015-01-31 22:01:48 -05:00
|
|
|
for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++sit, ++mit)
|
2015-01-12 13:20:53 -05:00
|
|
|
traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true);
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
return generate_impl(label, array_expressions, mappings);
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class TType, class PType>
|
2015-01-31 22:01:48 -05:00
|
|
|
int base_impl<TType, PType>::check_invalid_impl(cl::Device const &, array_expressions_container const &) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{ return TEMPLATE_VALID; }
|
|
|
|
|
|
|
|
template<class TType, class PType>
|
2015-01-17 10:48:02 -05:00
|
|
|
base_impl<TType, PType>::base_impl(parameters_type const & parameters, binding_policy_t binding_policy) : base(binding_policy), p_(parameters)
|
2015-01-12 13:20:53 -05:00
|
|
|
{ }
|
|
|
|
|
|
|
|
template<class TType, class PType>
|
2015-01-17 10:48:02 -05:00
|
|
|
int_t base_impl<TType, PType>::local_size_0() const
|
2015-01-12 13:20:53 -05:00
|
|
|
{ return p_.local_size_0; }
|
|
|
|
|
|
|
|
template<class TType, class PType>
|
2015-01-17 10:48:02 -05:00
|
|
|
int_t base_impl<TType, PType>::local_size_1() const
|
2015-01-12 13:20:53 -05:00
|
|
|
{ return p_.local_size_1; }
|
|
|
|
|
|
|
|
template<class TType, class PType>
|
2015-01-17 10:48:02 -05:00
|
|
|
tools::shared_ptr<base> base_impl<TType, PType>::clone() const
|
|
|
|
{ return tools::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
|
2015-01-12 13:20:53 -05:00
|
|
|
|
|
|
|
template<class TType, class PType>
|
2015-01-31 22:01:48 -05:00
|
|
|
int base_impl<TType, PType>::check_invalid(array_expressions_container const & array_expressions, cl::Device const & device) const
|
2015-01-12 13:20:53 -05:00
|
|
|
{
|
|
|
|
//Query device informations
|
|
|
|
size_t lmem_available = device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
|
2015-01-31 22:01:48 -05:00
|
|
|
size_t lmem_used = lmem_usage(array_expressions);
|
2015-01-12 13:20:53 -05:00
|
|
|
if (lmem_used>lmem_available)
|
|
|
|
return TEMPLATE_LOCAL_MEMORY_OVERFLOW;
|
|
|
|
|
|
|
|
//Invalid work group size
|
|
|
|
size_t max_workgroup_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
|
|
|
|
std::vector<size_t> max_work_item_sizes = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
|
|
|
|
if (p_.local_size_0*p_.local_size_1 > max_workgroup_size)
|
|
|
|
return TEMPLATE_WORK_GROUP_SIZE_OVERFLOW;
|
|
|
|
if (p_.local_size_0 > max_work_item_sizes[0])
|
|
|
|
return TEMPLATE_LOCAL_SIZE_0_OVERFLOW;
|
|
|
|
|
|
|
|
if (p_.local_size_1 > max_work_item_sizes[1])
|
|
|
|
return TEMPLATE_LOCAL_SIZE_1_OVERFLOW;
|
|
|
|
|
|
|
|
//Advice from the Intel guide
|
|
|
|
unsigned int warp_size = 8;
|
|
|
|
if (device.getInfo<CL_DEVICE_TYPE>()==CL_DEVICE_TYPE_GPU)
|
|
|
|
{
|
|
|
|
//Advice from the nvidia guide
|
|
|
|
warp_size = 32;
|
|
|
|
//Advice from the AMD guide
|
|
|
|
if (device.getInfo<CL_DEVICE_VENDOR_ID>()==4098)
|
|
|
|
warp_size = 64;
|
|
|
|
}
|
|
|
|
if (((p_.local_size_0*p_.local_size_1)%warp_size)>0)
|
|
|
|
return TEMPLATE_LOCAL_SIZE_NOT_WARP_MULTIPLE;
|
|
|
|
|
|
|
|
//Invalid SIMD Width
|
|
|
|
if (p_.simd_width!=1 && p_.simd_width!=2 &&
|
|
|
|
p_.simd_width!=4 && p_.simd_width!=8 &&
|
|
|
|
p_.simd_width!=16)
|
|
|
|
return TEMPLATE_INVALID_SIMD_WIDTH;
|
|
|
|
|
2015-01-31 22:01:48 -05:00
|
|
|
return check_invalid_impl(device, array_expressions);
|
2015-01-12 13:20:53 -05:00
|
|
|
}
|
|
|
|
|
2015-01-17 10:48:02 -05:00
|
|
|
template class base_impl<vaxpy, vaxpy_parameters>;
|
|
|
|
template class base_impl<reduction, reduction_parameters>;
|
|
|
|
template class base_impl<maxpy, maxpy_parameters>;
|
|
|
|
template class base_impl<mreduction, mreduction_parameters>;
|
|
|
|
template class base_impl<mproduct, mproduct_parameters>;
|
2015-01-12 13:20:53 -05:00
|
|
|
|
|
|
|
}
|