Feature: Merged kernel-fusion branch

* Fuses multiple AXPY kernel
* Possibility to add thread-wise for loops in AXPY-like kernels
This commit is contained in:
Philippe Tillet
2015-09-30 15:31:41 -04:00
parent 149441b9e2
commit feeb1e9862
64 changed files with 10047 additions and 1119 deletions

View File

@@ -23,6 +23,14 @@ void fill(lhs_rhs_element & x, std::size_t node_index)
x.node_index = node_index;
}
void fill(lhs_rhs_element & x, for_idx_t index)
{
x.type_family = PLACEHOLDER_TYPE_FAMILY;
x.subtype = FOR_LOOP_INDEX_TYPE;
x.dtype = INVALID_NUMERIC_TYPE;
x.for_idx = index;
}
void fill(lhs_rhs_element & x, array const & a)
{
x.type_family = ARRAY_TYPE_FAMILY;
@@ -39,14 +47,6 @@ void fill(lhs_rhs_element & x, value_scalar const & v)
x.vscalar = v.values();
}
void fill(lhs_rhs_element & x, repeat_infos const & r)
{
x.type_family = INFOS_TYPE_FAMILY;
x.subtype = REPEAT_INFOS_TYPE;
x.dtype = INVALID_NUMERIC_TYPE;
x.tuple = r;
}
lhs_rhs_element::lhs_rhs_element(){}
//
@@ -54,9 +54,38 @@ op_element::op_element() {}
op_element::op_element(operation_node_type_family const & _type_family, operation_node_type const & _type) : type_family(_type_family), type(_type){}
//
math_expression::math_expression(for_idx_t const &lhs, for_idx_t const &rhs, const op_element &op)
: tree_(1), root_(0), context_(NULL), dtype_(INVALID_NUMERIC_TYPE), shape_(1)
{
fill(tree_[0].lhs, lhs);
tree_[0].op = op;
fill(tree_[0].rhs, rhs);
}
math_expression::math_expression(for_idx_t const &lhs, value_scalar const &rhs, const op_element &op, const numeric_type &dtype)
: tree_(1), root_(0), context_(NULL), dtype_(dtype), shape_(1)
{
fill(tree_[0].lhs, lhs);
tree_[0].op = op;
fill(tree_[0].rhs, rhs);
}
math_expression::math_expression(value_scalar const &lhs, for_idx_t const &rhs, const op_element &op, const numeric_type &dtype)
: tree_(1), root_(0), context_(NULL), dtype_(dtype), shape_(1)
{
fill(tree_[0].lhs, lhs);
tree_[0].op = op;
fill(tree_[0].rhs, rhs);
}
//math_expression(for_idx_t const &lhs, for_idx_t const &rhs, const op_element &op);
//math_expression(for_idx_t const &lhs, value_scalar const &rhs, const op_element &op, const numeric_type &dtype);
template<class LT, class RT>
array_expression::array_expression(LT const & lhs, RT const & rhs, op_element const & op, driver::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(1), root_(0), context_(context), dtype_(dtype), shape_(shape)
math_expression::math_expression(LT const & lhs, RT const & rhs, op_element const & op, driver::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(1), root_(0), context_(&context), dtype_(dtype), shape_(shape)
{
fill(tree_[0].lhs, lhs);
tree_[0].op = op;
@@ -64,8 +93,8 @@ array_expression::array_expression(LT const & lhs, RT const & rhs, op_element co
}
template<class RT>
array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, driver::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
math_expression::math_expression(math_expression const & lhs, RT const & rhs, op_element const & op, driver::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(&context), dtype_(dtype), shape_(shape)
{
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
fill(tree_[root_].lhs, lhs.root_);
@@ -74,8 +103,8 @@ array_expression::array_expression(array_expression const & lhs, RT const & rhs,
}
template<class LT>
array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, driver::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(context), dtype_(dtype), shape_(shape)
math_expression::math_expression(LT const & lhs, math_expression const & rhs, op_element const & op, driver::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(&context), dtype_(dtype), shape_(shape)
{
std::copy(rhs.tree_.begin(), rhs.tree_.end(), tree_.begin());
fill(tree_[root_].lhs, lhs);
@@ -83,8 +112,8 @@ array_expression::array_expression(LT const & lhs, array_expression const & rhs,
fill(tree_[root_].rhs, rhs.root_);
}
array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, driver::Context const & context, numeric_type const & dtype, size4 const & shape):
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
math_expression::math_expression(math_expression const & lhs, math_expression const & rhs, op_element const & op, driver::Context const & context, numeric_type const & dtype, size4 const & shape):
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(&context), dtype_(dtype), shape_(shape)
{
std::size_t lsize = lhs.tree_.size();
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
@@ -99,111 +128,84 @@ array_expression::array_expression(array_expression const & lhs, array_expressio
root_ = tree_.size() - 1;
}
template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(math_expression const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(math_expression const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(math_expression const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(math_expression const &, for_idx_t const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array const &, array_expression const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(value_scalar const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(value_scalar const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(value_scalar const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(value_scalar const &, math_expression const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(invalid_node const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(invalid_node const &, math_expression const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(invalid_node const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(invalid_node const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(array const &, math_expression const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(array const &, value_scalar const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(array const &, invalid_node const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(array const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(array const &, for_idx_t const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(for_idx_t const &, math_expression const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template math_expression::math_expression(for_idx_t const &, array const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, repeat_infos const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, repeat_infos const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array const &, repeat_infos const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, repeat_infos const &, op_element const &, driver::Context const &, numeric_type const &, size4 const &);
array_expression::container_type & array_expression::tree()
math_expression::container_type & math_expression::tree()
{ return tree_; }
array_expression::container_type const & array_expression::tree() const
math_expression::container_type const & math_expression::tree() const
{ return tree_; }
std::size_t array_expression::root() const
std::size_t math_expression::root() const
{ return root_; }
driver::Context const & array_expression::context() const
{ return context_; }
driver::Context const & math_expression::context() const
{ return *context_; }
numeric_type const & array_expression::dtype() const
numeric_type const & math_expression::dtype() const
{ return dtype_; }
size4 array_expression::shape() const
size4 math_expression::shape() const
{ return shape_; }
int_t array_expression::nshape() const
int_t math_expression::nshape() const
{ return int_t((shape_[0] > 1) + (shape_[1] > 1)); }
array_expression& array_expression::reshape(int_t size1, int_t size2)
math_expression& math_expression::reshape(int_t size1, int_t size2)
{
assert(size1*size2==prod(shape_));
shape_ = size4(size1, size2);
return *this;
}
array_expression array_expression::operator-()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
math_expression math_expression::operator-()
{ return math_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), *context_, dtype_, shape_); }
array_expression array_expression::operator!()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
math_expression math_expression::operator!()
{ return math_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), *context_, INT_TYPE, shape_); }
//
std::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
math_expression::node const & lhs_most(math_expression::container_type const & array, math_expression::node const & init)
{
return std::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
}
expressions_tuple::expressions_tuple(data_type const & data, order_type order) : data_(data), order_(order)
{ }
expressions_tuple::expressions_tuple(array_expression const & s0) : order_(INDEPENDENT)
{
data_.push_back(create(s0));
}
expressions_tuple::expressions_tuple(order_type order, array_expression const & s0, array_expression const & s1) : order_(order)
{
data_.push_back(create(s0));
data_.push_back(create(s1));
}
expressions_tuple::data_type const & expressions_tuple::data() const
{ return data_; }
driver::Context const & expressions_tuple::context() const
{ return data_.front()->context(); }
expressions_tuple::order_type expressions_tuple::order() const
{ return order_; }
array_expression::node const & lhs_most(array_expression::container_type const & array, array_expression::node const & init)
{
array_expression::node const * current = &init;
math_expression::node const * current = &init;
while (current->lhs.type_family==COMPOSITE_OPERATOR_FAMILY)
current = &array[current->lhs.node_index];
return *current;
}
array_expression::node const & lhs_most(array_expression::container_type const & array, size_t root)
math_expression::node const & lhs_most(math_expression::container_type const & array, size_t root)
{ return lhs_most(array, array[root]); }
//
math_expression for_idx_t::operator=(value_scalar const & r) const { return math_expression(*this, r, op_element(OPERATOR_BINARY_TYPE_FAMILY,OPERATOR_ASSIGN_TYPE), r.dtype()); }
math_expression for_idx_t::operator=(math_expression const & r) const { return math_expression(*this, r, op_element(OPERATOR_BINARY_TYPE_FAMILY,OPERATOR_ASSIGN_TYPE), r.context(), r.dtype(), r.shape()); }
math_expression for_idx_t::operator+=(value_scalar const & r) const { return *this = *this + r; }
math_expression for_idx_t::operator-=(value_scalar const & r) const { return *this = *this - r; }
math_expression for_idx_t::operator*=(value_scalar const & r) const { return *this = *this * r; }
math_expression for_idx_t::operator/=(value_scalar const & r) const { return *this = *this / r; }
}