Some renaming; lower overhead in benchmark
This commit is contained in:
@@ -303,12 +303,12 @@ std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & acc
|
||||
}
|
||||
|
||||
void evaluate(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors,
|
||||
array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings)
|
||||
expressions_tuple const & expressions, std::vector<mapping_type> const & mappings)
|
||||
{
|
||||
array_expressions_container::data_type::const_iterator sit;
|
||||
expressions_tuple::data_type::const_iterator sit;
|
||||
std::vector<mapping_type>::const_iterator mit;
|
||||
|
||||
for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++mit, ++sit)
|
||||
for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++mit, ++sit)
|
||||
stream << evaluate(leaf, accessors, **sit, (*sit)->root(), *mit) << ";" << std::endl;
|
||||
}
|
||||
|
||||
@@ -368,13 +368,13 @@ void process(kernel_generation_stream & stream, leaf_t leaf, std::map<std::strin
|
||||
}
|
||||
|
||||
void process(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors,
|
||||
array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings)
|
||||
expressions_tuple const & expressions, std::vector<mapping_type> const & mappings)
|
||||
{
|
||||
array_expressions_container::data_type::const_iterator sit;
|
||||
expressions_tuple::data_type::const_iterator sit;
|
||||
std::vector<mapping_type>::const_iterator mit;
|
||||
std::set<std::string> already_processed;
|
||||
|
||||
for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++mit, ++sit)
|
||||
for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++mit, ++sit)
|
||||
process(stream, leaf, accessors, **sit, (*sit)->root(), *mit, already_processed);
|
||||
}
|
||||
|
||||
|
@@ -258,30 +258,30 @@ std::string base::neutral_element(op_element const & op)
|
||||
}
|
||||
}
|
||||
|
||||
std::string base::generate_arguments(std::vector<mapping_type> const & mappings, std::map<std::string, std::string> const & accessors, array_expressions_container const & array_expressions)
|
||||
std::string base::generate_arguments(std::vector<mapping_type> const & mappings, std::map<std::string, std::string> const & accessors, expressions_tuple const & expressions)
|
||||
{
|
||||
kernel_generation_stream stream;
|
||||
process(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings);
|
||||
process(stream, PARENT_NODE_TYPE, accessors, expressions, mappings);
|
||||
std::string res = stream.str();
|
||||
res.erase(res.rfind(','));
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string base::generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, array_expressions_container const & array_expressions)
|
||||
std::string base::generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, expressions_tuple const & expressions)
|
||||
{
|
||||
return generate_arguments(mappings, tools::make_map<std::map<std::string, std::string> >("array0", "__global #scalartype* #pointer, uint #start,")
|
||||
("host_scalar", "#scalartype #name,")
|
||||
("array1", "__global " + data_type + "* #pointer, uint #start, uint #stride,")
|
||||
("array2", "__global " + data_type + "* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,")
|
||||
("tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"), array_expressions);
|
||||
("tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"), expressions);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void base::set_arguments(array_expressions_container const & array_expressions, cl::Kernel & kernel, unsigned int & current_arg)
|
||||
void base::set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg)
|
||||
{
|
||||
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
||||
for (array_expressions_container::data_type::const_iterator itt = array_expressions.data().begin(); itt != array_expressions.data().end(); ++itt)
|
||||
for (expressions_tuple::data_type::const_iterator itt = expressions.data().begin(); itt != expressions.data().end(); ++itt)
|
||||
traverse(**itt, (*itt)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
|
||||
}
|
||||
|
||||
@@ -370,9 +370,9 @@ bool base::is_strided(array_expression::node const & node)
|
||||
|| node.op.type==OPERATOR_OUTER_PROD_TYPE;
|
||||
}
|
||||
|
||||
bool base::requires_fallback(array_expressions_container const & array_expressions)
|
||||
bool base::requires_fallback(expressions_tuple const & expressions)
|
||||
{
|
||||
for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it)
|
||||
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
|
||||
for(array_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt)
|
||||
if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0))
|
||||
|| (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0)))
|
||||
@@ -490,34 +490,34 @@ tools::shared_ptr<symbolic_binder> base::make_binder()
|
||||
base::base(binding_policy_t binding_policy) : binding_policy_(binding_policy)
|
||||
{}
|
||||
|
||||
unsigned int base::lmem_usage(array_expressions_container const &) const
|
||||
unsigned int base::lmem_usage(expressions_tuple const &) const
|
||||
{ return 0; }
|
||||
|
||||
unsigned int base::registers_usage(array_expressions_container const &) const
|
||||
unsigned int base::registers_usage(expressions_tuple const &) const
|
||||
{ return 0; }
|
||||
|
||||
base::~base()
|
||||
{ }
|
||||
|
||||
std::vector<std::string> base::generate(unsigned int label, array_expressions_container const & array_expressions, cl::Device const & device)
|
||||
std::vector<std::string> base::generate(unsigned int label, expressions_tuple const & expressions, cl::Device const & device)
|
||||
{
|
||||
array_expressions_container::data_type::const_iterator sit;
|
||||
expressions_tuple::data_type::const_iterator sit;
|
||||
std::vector<mapping_type>::iterator mit;
|
||||
|
||||
if(int err = check_invalid(array_expressions, device))
|
||||
if(int err = check_invalid(expressions, device))
|
||||
throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err));
|
||||
|
||||
//Create mapping
|
||||
std::vector<mapping_type> mappings(array_expressions.data().size());
|
||||
std::vector<mapping_type> mappings(expressions.data().size());
|
||||
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
||||
for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++sit, ++mit)
|
||||
for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit)
|
||||
traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true);
|
||||
|
||||
return generate_impl(label, array_expressions, mappings);
|
||||
return generate_impl(label, expressions, mappings);
|
||||
}
|
||||
|
||||
template<class TType, class PType>
|
||||
int base_impl<TType, PType>::check_invalid_impl(cl::Device const &, array_expressions_container const &) const
|
||||
int base_impl<TType, PType>::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
|
||||
{ return TEMPLATE_VALID; }
|
||||
|
||||
template<class TType, class PType>
|
||||
@@ -537,11 +537,11 @@ tools::shared_ptr<base> base_impl<TType, PType>::clone() const
|
||||
{ return tools::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
|
||||
|
||||
template<class TType, class PType>
|
||||
int base_impl<TType, PType>::check_invalid(array_expressions_container const & array_expressions, cl::Device const & device) const
|
||||
int base_impl<TType, PType>::check_invalid(expressions_tuple const & expressions, cl::Device const & device) const
|
||||
{
|
||||
//Query device informations
|
||||
size_t lmem_available = device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
|
||||
size_t lmem_used = lmem_usage(array_expressions);
|
||||
size_t lmem_used = lmem_usage(expressions);
|
||||
if (lmem_used>lmem_available)
|
||||
return TEMPLATE_LOCAL_MEMORY_OVERFLOW;
|
||||
|
||||
@@ -575,7 +575,7 @@ int base_impl<TType, PType>::check_invalid(array_expressions_container const & a
|
||||
p_.simd_width!=16)
|
||||
return TEMPLATE_INVALID_SIMD_WIDTH;
|
||||
|
||||
return check_invalid_impl(device, array_expressions);
|
||||
return check_invalid_impl(device, expressions);
|
||||
}
|
||||
|
||||
template class base_impl<vaxpy, vaxpy_parameters>;
|
||||
|
@@ -14,7 +14,7 @@ maxpy_parameters::maxpy_parameters(unsigned int _simd_width,
|
||||
|
||||
|
||||
|
||||
int maxpy::check_invalid_impl(cl::Device const &, array_expressions_container const &) const
|
||||
int maxpy::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
|
||||
{
|
||||
if (p_.simd_width>1)
|
||||
return TEMPLATE_INVALID_SIMD_WIDTH;
|
||||
@@ -23,7 +23,7 @@ int maxpy::check_invalid_impl(cl::Device const &, array_expressions_container co
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
std::string maxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
|
||||
std::string maxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
|
||||
{
|
||||
kernel_generation_stream stream;
|
||||
|
||||
@@ -33,13 +33,13 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container
|
||||
fill_kernel_name(kprefix, label, "d");
|
||||
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
process(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];")
|
||||
("array1", "#pointer += #start;")
|
||||
("array2", "#pointer = &$VALUE{#start1, #start2};"), array_expressions, mappings);
|
||||
("array2", "#pointer = &$VALUE{#start1, #start2};"), expressions, mappings);
|
||||
|
||||
fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)");
|
||||
stream << "for(unsigned int i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl;
|
||||
@@ -55,7 +55,7 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container
|
||||
("vdiag", "#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:$VALUE{min(i*#stride1, j*#stride1)};")
|
||||
("repeat", "#scalartype #namereg = $VALUE{(i%#tuplearg0)*#stride1, (j%#tuplearg1)*#stride2};")
|
||||
("outer", "#scalartype #namereg = ($LVALUE{i*#stride})*($RVALUE{j*#stride});")
|
||||
, array_expressions, mappings);
|
||||
, expressions, mappings);
|
||||
|
||||
evaluate(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >
|
||||
("array2", "#namereg")
|
||||
@@ -64,10 +64,10 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container
|
||||
("array0", "#namereg")
|
||||
("outer", "#namereg")
|
||||
("cast", "convert_"+data_type)
|
||||
, array_expressions, mappings);
|
||||
, expressions, mappings);
|
||||
|
||||
process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array2", "$VALUE{i*#stride1,j*#stride2} = #namereg;")
|
||||
, array_expressions, mappings);
|
||||
, expressions, mappings);
|
||||
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
@@ -81,10 +81,10 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
std::vector<std::string> maxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const
|
||||
std::vector<std::string> maxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
|
||||
{
|
||||
std::vector<std::string> res;
|
||||
res.push_back(generate_impl(label, array_expressions, mappings, 1));
|
||||
res.push_back(generate_impl(label, expressions, mappings, 1));
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -97,9 +97,9 @@ maxpy::maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2,
|
||||
base_impl<maxpy, maxpy_parameters>(maxpy_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind)
|
||||
{}
|
||||
|
||||
std::vector<int_t> maxpy::input_sizes(array_expressions_container const & array_expressions)
|
||||
std::vector<int_t> maxpy::input_sizes(expressions_tuple const & expressions)
|
||||
{
|
||||
atidlas::array_expression const & array_expression = *(array_expressions.data().front());
|
||||
atidlas::array_expression const & array_expression = *(expressions.data().front());
|
||||
std::pair<int_t, int_t> size = matrix_size(lhs_most(array_expression.tree(), array_expression.root()));
|
||||
return tools::make_vector<int_t>() << size.first << size.second;
|
||||
}
|
||||
@@ -107,7 +107,7 @@ std::vector<int_t> maxpy::input_sizes(array_expressions_container const & array_
|
||||
void maxpy::enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
array_expressions_container const & array_expressions)
|
||||
expressions_tuple const & expressions)
|
||||
{
|
||||
char kname[10];
|
||||
fill_kernel_name(kname, label, "d");
|
||||
@@ -116,10 +116,10 @@ void maxpy::enqueue(cl::CommandQueue & queue,
|
||||
cl::NDRange grange(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1);
|
||||
cl::NDRange lrange(p_.local_size_0, p_.local_size_1);
|
||||
unsigned int current_arg = 0;
|
||||
std::vector<int_t> MN = input_sizes(array_expressions);
|
||||
std::vector<int_t> MN = input_sizes(expressions);
|
||||
kernel.setArg(current_arg++, cl_uint(MN[0]));
|
||||
kernel.setArg(current_arg++, cl_uint(MN[1]));
|
||||
set_arguments(array_expressions, kernel, current_arg);
|
||||
set_arguments(expressions, kernel, current_arg);
|
||||
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
|
||||
}
|
||||
|
||||
|
@@ -17,9 +17,9 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
mL(ms*local_size_0), nL(ns*local_size_1){}
|
||||
|
||||
|
||||
unsigned int mproduct::lmem_usage(array_expressions_container const & array_expressions) const
|
||||
unsigned int mproduct::lmem_usage(expressions_tuple const & expressions) const
|
||||
{
|
||||
atidlas::array_expression const & array_expression = (*array_expressions.data().front());
|
||||
atidlas::array_expression const & array_expression = (*expressions.data().front());
|
||||
numeric_type numeric_t = lhs_most(array_expression.tree(), array_expression.root()).lhs.dtype;
|
||||
|
||||
unsigned int N = 0;
|
||||
@@ -30,16 +30,16 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
return N*size_of(numeric_t);
|
||||
}
|
||||
|
||||
unsigned int mproduct::registers_usage(array_expressions_container const & array_expressions) const
|
||||
unsigned int mproduct::registers_usage(expressions_tuple const & expressions) const
|
||||
{
|
||||
atidlas::array_expression const & array_expression = (*array_expressions.data().front());
|
||||
atidlas::array_expression const & array_expression = (*expressions.data().front());
|
||||
numeric_type numeric_t = lhs_most(array_expression.tree(), array_expression.root()).lhs.dtype;
|
||||
|
||||
unsigned int N = p_.mS * p_.nS + p_.mS * p_.kS + p_.kS * p_.nS;
|
||||
return N*size_of(numeric_t);
|
||||
}
|
||||
|
||||
int mproduct::check_invalid_impl(cl::Device const &, array_expressions_container const &) const
|
||||
int mproduct::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
|
||||
{
|
||||
if (p_.A_fetching_policy!=FETCH_FROM_LOCAL && p_.B_fetching_policy!=FETCH_FROM_LOCAL&& (p_.local_fetch_0!=0 || p_.local_fetch_1!=0))
|
||||
return TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH;
|
||||
@@ -87,7 +87,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
std::string mproduct::generate_impl(unsigned int label, const char * id, const array_expressions_container &array_expressions, const std::vector<mapping_type> &, bool fallback) const
|
||||
std::string mproduct::generate_impl(unsigned int label, const char * id, const expressions_tuple &expressions, const std::vector<mapping_type> &, bool fallback) const
|
||||
{
|
||||
using std::string;
|
||||
using tools::to_string;
|
||||
@@ -106,7 +106,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
/// INIT
|
||||
/// //////////////
|
||||
kernel_generation_stream stream;
|
||||
array_expression const & st = (*array_expressions.data().front());
|
||||
array_expression const & st = (*expressions.data().front());
|
||||
numeric_type dtype = lhs_most(st.tree(), st.root()).lhs.dtype;
|
||||
std::string dtypestr = numeric_type_to_string(dtype);
|
||||
|
||||
@@ -557,11 +557,11 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
#undef VST0RE
|
||||
}
|
||||
|
||||
std::vector<std::string> mproduct::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const
|
||||
std::vector<std::string> mproduct::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
|
||||
{
|
||||
std::vector<std::string> res;
|
||||
res.push_back(generate_impl(label, "o", array_expressions, mappings, false));
|
||||
res.push_back(generate_impl(label, "f", array_expressions, mappings, true));
|
||||
res.push_back(generate_impl(label, "o", expressions, mappings, false));
|
||||
res.push_back(generate_impl(label, "f", expressions, mappings, true));
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -615,10 +615,10 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<int_t> mproduct::infos(array_expressions_container const & array_expressions,
|
||||
std::vector<int_t> mproduct::infos(expressions_tuple const & expressions,
|
||||
lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B)
|
||||
{
|
||||
atidlas::array_expression const & array_expression = (*array_expressions.data().front());
|
||||
atidlas::array_expression const & array_expression = (*expressions.data().front());
|
||||
array_expression::container_type const & array = array_expression.tree();
|
||||
std::size_t root = array_expression.root();
|
||||
|
||||
@@ -640,18 +640,18 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
mproduct::mproduct(mproduct_parameters const & parameters, char A_trans, char B_trans) : base_impl<mproduct, mproduct_parameters>(parameters, BIND_ALL_UNIQUE), A_trans_(A_trans), B_trans_(B_trans)
|
||||
{ }
|
||||
|
||||
std::vector<int_t> mproduct::input_sizes(array_expressions_container const & array_expressions)
|
||||
std::vector<int_t> mproduct::input_sizes(expressions_tuple const & expressions)
|
||||
{
|
||||
lhs_rhs_element d0, d1, d2;
|
||||
return infos(array_expressions, d0, d1, d2);
|
||||
return infos(expressions, d0, d1, d2);
|
||||
}
|
||||
|
||||
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, array_expressions_container const & array_expressions)
|
||||
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, expressions_tuple const & expressions)
|
||||
{
|
||||
using namespace tools;
|
||||
|
||||
lhs_rhs_element C, A, B;
|
||||
std::vector<int_t> MNK = infos(array_expressions, C, A, B);
|
||||
std::vector<int_t> MNK = infos(expressions, C, A, B);
|
||||
|
||||
int_t M = MNK[0];
|
||||
int_t N = MNK[1];
|
||||
|
@@ -14,7 +14,7 @@ mreduction_parameters::mreduction_parameters(unsigned int _simd_width,
|
||||
num_groups_0(_num_groups_0), fetch_policy(_fetch_policy) { }
|
||||
|
||||
|
||||
int mreduction::check_invalid_impl(cl::Device const &, array_expressions_container const &) const
|
||||
int mreduction::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
|
||||
{
|
||||
if (p_.fetch_policy==FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
@@ -26,7 +26,7 @@ unsigned int mreduction::lmem_usage() const
|
||||
return p_.local_size_0*(p_.local_size_1+1);
|
||||
}
|
||||
|
||||
std::string mreduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width, std::vector<mapped_mreduction*> const & exprs) const
|
||||
std::string mreduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width, std::vector<mapped_mreduction*> const & exprs) const
|
||||
{
|
||||
using tools::to_string;
|
||||
|
||||
@@ -40,7 +40,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont
|
||||
fill_kernel_name(kprefix, label, "d");
|
||||
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
@@ -48,7 +48,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont
|
||||
tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];")
|
||||
("array1", "#pointer += #start;")
|
||||
("array2", "#pointer += #start1 + #start2*#ld; "
|
||||
"#ld *= #nldstride; "), array_expressions, mappings);
|
||||
"#ld *= #nldstride; "), expressions, mappings);
|
||||
|
||||
for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
|
||||
stream << (*it)->process("__local #scalartype #name_buf[" + to_string(lsize0*lsize1) + "];") << std::endl;
|
||||
@@ -160,7 +160,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont
|
||||
std::map<std::string, std::string> accessors;
|
||||
accessors["mreduction"] = "#name_buf[lid0*" + lsize1str + "]";
|
||||
accessors["array1"] = "#pointer[r*#stride]";
|
||||
evaluate(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings);
|
||||
evaluate(stream, PARENT_NODE_TYPE, accessors, expressions, mappings);
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
|
||||
@@ -174,14 +174,14 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
std::vector<std::string> mreduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const
|
||||
std::vector<std::string> mreduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
|
||||
{
|
||||
std::vector<mapped_mreduction*> exprs;
|
||||
array_expressions_container::data_type::const_iterator sit;
|
||||
expressions_tuple::data_type::const_iterator sit;
|
||||
std::vector<mapping_type>::const_iterator mit;
|
||||
for (mit = mappings.begin(), sit = array_expressions.data().begin(); mit != mappings.end(); ++mit, ++sit)
|
||||
for (mit = mappings.begin(), sit = expressions.data().begin(); mit != mappings.end(); ++mit, ++sit)
|
||||
{
|
||||
array_expression const & first_expression = *array_expressions.data().front();
|
||||
array_expression const & first_expression = *expressions.data().front();
|
||||
std::vector<size_t> idx = filter_nodes(&is_reduction, first_expression, false);
|
||||
for (unsigned int j = 0; j < idx.size(); ++j)
|
||||
exprs.push_back((mapped_mreduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get()));
|
||||
@@ -190,11 +190,11 @@ std::vector<std::string> mreduction::generate_impl(unsigned int label, array_exp
|
||||
std::vector<std::string> res;
|
||||
if (reduction_type_ && p_.simd_width>1)
|
||||
{
|
||||
res.push_back(generate_impl(label, array_expressions, mappings, p_.simd_width, exprs));
|
||||
res.push_back(generate_impl(label, array_expressions, mappings, 1, exprs));
|
||||
res.push_back(generate_impl(label, expressions, mappings, p_.simd_width, exprs));
|
||||
res.push_back(generate_impl(label, expressions, mappings, 1, exprs));
|
||||
}
|
||||
else
|
||||
res.push_back(generate_impl(label, array_expressions, mappings, 1, exprs));
|
||||
res.push_back(generate_impl(label, expressions, mappings, 1, exprs));
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -204,9 +204,9 @@ mreduction::mreduction(mreduction::parameters_type const & parameters,
|
||||
base_impl<mreduction, mreduction_parameters>(parameters, binding_policy),
|
||||
reduction_type_(rtype){ }
|
||||
|
||||
std::vector<int_t> mreduction::input_sizes(array_expressions_container const & array_expressions)
|
||||
std::vector<int_t> mreduction::input_sizes(expressions_tuple const & expressions)
|
||||
{
|
||||
array_expression const & first_expression = *array_expressions.data().front();
|
||||
array_expression const & first_expression = *expressions.data().front();
|
||||
std::vector<std::size_t> idx = filter_nodes(&is_reduction, first_expression, false);
|
||||
std::pair<int_t, int_t> MN = matrix_size(lhs_most(first_expression.tree(), idx[0]));
|
||||
if(reduction_type_==REDUCE_COLUMNS)
|
||||
@@ -217,15 +217,15 @@ std::vector<int_t> mreduction::input_sizes(array_expressions_container const & a
|
||||
void mreduction::enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
array_expressions_container const & array_expressions)
|
||||
expressions_tuple const & expressions)
|
||||
{
|
||||
char kname[10];
|
||||
fill_kernel_name(kname, label, "d");
|
||||
std::vector<int_t> MN = input_sizes(array_expressions);
|
||||
std::vector<int_t> MN = input_sizes(expressions);
|
||||
|
||||
//Kernel
|
||||
int idx = 0;
|
||||
if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && requires_fallback(array_expressions))
|
||||
if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && requires_fallback(expressions))
|
||||
idx = 1;
|
||||
cl::Program & program = programs[idx].program();
|
||||
cl::Kernel kernel(program, kname);
|
||||
@@ -237,7 +237,7 @@ void mreduction::enqueue(cl::CommandQueue & queue,
|
||||
unsigned int current_arg = 0;
|
||||
kernel.setArg(current_arg++, cl_uint(MN[0]));
|
||||
kernel.setArg(current_arg++, cl_uint(MN[1]));
|
||||
set_arguments(array_expressions, kernel, current_arg);
|
||||
set_arguments(expressions, kernel, current_arg);
|
||||
|
||||
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
|
||||
}
|
||||
|
@@ -13,10 +13,10 @@ reduction_parameters::reduction_parameters(unsigned int _simd_width,
|
||||
fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _group_size, 1, 2), num_groups(_num_groups), fetching_policy(_fetching_policy)
|
||||
{ }
|
||||
|
||||
unsigned int reduction::lmem_usage(array_expressions_container const & array_expressions) const
|
||||
unsigned int reduction::lmem_usage(expressions_tuple const & expressions) const
|
||||
{
|
||||
unsigned int res = 0;
|
||||
for(array_expressions_container::data_type::const_iterator it = array_expressions.data().begin() ; it != array_expressions.data().end() ; ++it)
|
||||
for(expressions_tuple::data_type::const_iterator it = expressions.data().begin() ; it != expressions.data().end() ; ++it)
|
||||
{
|
||||
numeric_type numeric_t= lhs_most((*it)->tree(), (*it)->root()).lhs.dtype;
|
||||
res += p_.local_size_0*size_of(numeric_t);
|
||||
@@ -24,7 +24,7 @@ unsigned int reduction::lmem_usage(array_expressions_container const & array_exp
|
||||
return res;
|
||||
}
|
||||
|
||||
int reduction::check_invalid_impl(cl::Device const &, array_expressions_container const &) const
|
||||
int reduction::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
|
||||
{
|
||||
if (p_.fetching_policy==FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
@@ -56,7 +56,7 @@ inline void reduction::reduce_1d_local_memory(kernel_generation_stream & stream,
|
||||
stream << "}" << std::endl;
|
||||
}
|
||||
|
||||
std::string reduction::generate_impl(unsigned int label, const char * type, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
|
||||
std::string reduction::generate_impl(unsigned int label, const char * type, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
|
||||
{
|
||||
kernel_generation_stream stream;
|
||||
|
||||
@@ -89,13 +89,13 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra
|
||||
fill_kernel_name(kprefix, label, type);
|
||||
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
stream << "unsigned int lid = get_local_id(0);" << std::endl;
|
||||
process(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];")
|
||||
("array1", "#pointer += #start;"), array_expressions, mappings);
|
||||
("array1", "#pointer += #start;"), expressions, mappings);
|
||||
|
||||
for (unsigned int k = 0; k < N; ++k)
|
||||
{
|
||||
@@ -194,7 +194,7 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra
|
||||
* Second kernel
|
||||
* -----------------------*/
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
@@ -246,7 +246,7 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra
|
||||
std::map<std::string, std::string> accessors;
|
||||
accessors["scalar_reduction"] = "#name_buf[0]";
|
||||
accessors["array0"] = "#pointer[#start]";
|
||||
evaluate(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings);
|
||||
evaluate(stream, PARENT_NODE_TYPE, accessors, expressions, mappings);
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
|
||||
@@ -256,11 +256,11 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
std::vector<std::string> reduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const
|
||||
std::vector<std::string> reduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
result.push_back(generate_impl(label, "f", array_expressions, mappings, 1));
|
||||
result.push_back(generate_impl(label, "o", array_expressions, mappings, p_.simd_width));
|
||||
result.push_back(generate_impl(label, "f", expressions, mappings, 1));
|
||||
result.push_back(generate_impl(label, "o", expressions, mappings, p_.simd_width));
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -273,22 +273,22 @@ reduction::reduction(unsigned int simd, unsigned int ls, unsigned int ng,
|
||||
base_impl<reduction, reduction_parameters>(reduction_parameters(simd,ls,ng,fetch), bind)
|
||||
{}
|
||||
|
||||
std::vector<int_t> reduction::input_sizes(array_expressions_container const & array_expressions)
|
||||
std::vector<int_t> reduction::input_sizes(expressions_tuple const & expressions)
|
||||
{
|
||||
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, *(array_expressions.data().front()), false);
|
||||
int_t N = vector_size(lhs_most(array_expressions.data().front()->tree(), reductions_idx[0]));
|
||||
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, *(expressions.data().front()), false);
|
||||
int_t N = vector_size(lhs_most(expressions.data().front()->tree(), reductions_idx[0]));
|
||||
return tools::make_vector<int_t>() << N;
|
||||
}
|
||||
|
||||
void reduction::enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
array_expressions_container const & array_expressions)
|
||||
expressions_tuple const & expressions)
|
||||
{
|
||||
//Preprocessing
|
||||
int_t size = input_sizes(array_expressions)[0];
|
||||
int_t size = input_sizes(expressions)[0];
|
||||
std::vector<array_expression::node const *> reductions;
|
||||
for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it)
|
||||
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
|
||||
{
|
||||
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, **it, false);
|
||||
for (std::vector<size_t>::iterator itt = reductions_idx.begin(); itt != reductions_idx.end(); ++itt)
|
||||
@@ -303,7 +303,7 @@ void reduction::enqueue(cl::CommandQueue & queue,
|
||||
fill_kernel_name(kopt[0], label, "o0");
|
||||
fill_kernel_name(kopt[1], label, "o1");
|
||||
|
||||
bool fallback = p_.simd_width > 1 && (requires_fallback(array_expressions) || (size%p_.simd_width>0));
|
||||
bool fallback = p_.simd_width > 1 && (requires_fallback(expressions) || (size%p_.simd_width>0));
|
||||
cl::Program & program = programs[fallback?0:1].program();
|
||||
cl::Kernel kernels[2] = { cl::Kernel(program, fallback?kfallback[0]:kopt[0]),
|
||||
cl::Kernel(program, fallback?kfallback[1]:kopt[1]) };
|
||||
@@ -313,8 +313,8 @@ void reduction::enqueue(cl::CommandQueue & queue,
|
||||
cl::NDRange lrange[2] = { cl::NDRange(p_.local_size_0), cl::NDRange(p_.local_size_0) };
|
||||
|
||||
//Arguments
|
||||
cl::Context context = array_expressions.context();
|
||||
array_expression const & s = *(array_expressions.data().front());
|
||||
cl::Context context = expressions.context();
|
||||
array_expression const & s = *(expressions.data().front());
|
||||
unsigned int dtype_size = size_of(lhs_most(s.tree(), s.root()).lhs.dtype);
|
||||
for (unsigned int k = 0; k < 2; k++)
|
||||
{
|
||||
@@ -338,7 +338,7 @@ void reduction::enqueue(cl::CommandQueue & queue,
|
||||
kernels[k].setArg(n_arg++, tmp_[i]);
|
||||
i++;
|
||||
}
|
||||
set_arguments(array_expressions, kernels[k], n_arg);
|
||||
set_arguments(expressions, kernels[k], n_arg);
|
||||
}
|
||||
|
||||
for (unsigned int k = 0; k < 2; k++)
|
||||
|
@@ -16,14 +16,14 @@ vaxpy_parameters::vaxpy_parameters(unsigned int _simd_width,
|
||||
{ }
|
||||
|
||||
|
||||
int vaxpy::check_invalid_impl(cl::Device const &, array_expressions_container const &) const
|
||||
int vaxpy::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
|
||||
{
|
||||
if (p_.fetching_policy==FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const
|
||||
std::vector<std::string> vaxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
for (unsigned int i = 0; i < 2; ++i)
|
||||
@@ -36,14 +36,14 @@ std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressi
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
|
||||
char kprefix[10];
|
||||
fill_kernel_name(kprefix, label, (i==0?"f":"o"));
|
||||
stream << "__kernel void " << kprefix << "(unsigned int N," << generate_arguments(data_type, mappings, array_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "(unsigned int N," << generate_arguments(data_type, mappings, expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
process(stream, PARENT_NODE_TYPE,
|
||||
tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];")
|
||||
("array1", "#pointer += #start;")
|
||||
("array1", "#start1/=" + str_simd_width + ";"), array_expressions, mappings);
|
||||
("array1", "#start1/=" + str_simd_width + ";"), expressions, mappings);
|
||||
|
||||
std::string init, upper_bound, inc;
|
||||
fetching_loop_info(p_.fetching_policy, "N/"+str_simd_width, stream, init, upper_bound, inc, "get_global_id(0)", "get_global_size(0)");
|
||||
@@ -55,7 +55,7 @@ std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressi
|
||||
("matrix_row", "#scalartype #namereg = $VALUE{#row*#stride1, i*#stride2};")
|
||||
("matrix_column", "#scalartype #namereg = $VALUE{i*#stride1,#column*#stride2};")
|
||||
("matrix_diag", "#scalartype #namereg = #pointer[#diag_offset<0?$OFFSET{(i - #diag_offset)*#stride1, i*#stride2}:$OFFSET{i*#stride1, (i + #diag_offset)*#stride2}];")
|
||||
, array_expressions, mappings);
|
||||
, expressions, mappings);
|
||||
|
||||
evaluate(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", "#namereg")
|
||||
("matrix_row", "#namereg")
|
||||
@@ -63,13 +63,13 @@ std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressi
|
||||
("matrix_diag", "#namereg")
|
||||
("array0", "#namereg")
|
||||
("cast", "convert_"+data_type)
|
||||
, array_expressions, mappings);
|
||||
, expressions, mappings);
|
||||
|
||||
process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", "#pointer[i*#stride] = #namereg;")
|
||||
("matrix_row", "$VALUE{#row, i} = #namereg;")
|
||||
("matrix_column", "$VALUE{i, #column} = #namereg;")
|
||||
("matrix_diag", "#diag_offset<0?$VALUE{(i - #diag_offset)*#stride1, i*#stride2}:$VALUE{i*#stride1, (i + #diag_offset)*#stride2} = #namereg;")
|
||||
,array_expressions, mappings);
|
||||
,expressions, mappings);
|
||||
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
@@ -77,7 +77,7 @@ std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressi
|
||||
stream << "if(get_global_id(0)==0)" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#pointer[#start] = #namereg;"), array_expressions, mappings);
|
||||
process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#pointer[#start] = #namereg;"), expressions, mappings);
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
|
||||
@@ -102,25 +102,25 @@ vaxpy::vaxpy(unsigned int simd, unsigned int ls, unsigned int ng,
|
||||
{}
|
||||
|
||||
|
||||
std::vector<int_t> vaxpy::input_sizes(array_expressions_container const & array_expressions)
|
||||
std::vector<int_t> vaxpy::input_sizes(expressions_tuple const & expressions)
|
||||
{
|
||||
int_t size = static_cast<array_expression const *>(array_expressions.data().front().get())->shape()._1;
|
||||
int_t size = static_cast<array_expression const *>(expressions.data().front().get())->shape()._1;
|
||||
return tools::make_vector<int_t>() << size;
|
||||
}
|
||||
|
||||
void vaxpy::enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
array_expressions_container const & array_expressions)
|
||||
expressions_tuple const & expressions)
|
||||
{
|
||||
//Size
|
||||
int_t size = input_sizes(array_expressions)[0];
|
||||
int_t size = input_sizes(expressions)[0];
|
||||
//Kernel
|
||||
char kfb[10];
|
||||
char kopt[10];
|
||||
fill_kernel_name(kfb, label, "f");
|
||||
fill_kernel_name(kopt, label, "o");
|
||||
bool fallback = p_.simd_width > 1 && (requires_fallback(array_expressions) || (size%p_.simd_width>0));
|
||||
bool fallback = p_.simd_width > 1 && (requires_fallback(expressions) || (size%p_.simd_width>0));
|
||||
|
||||
cl::Program const & program = programs[fallback?0:1].program();
|
||||
cl_ext::kernels_t::key_type key(program(), label);
|
||||
@@ -135,7 +135,7 @@ void vaxpy::enqueue(cl::CommandQueue & queue,
|
||||
//Arguments
|
||||
unsigned int current_arg = 0;
|
||||
kernel.setArg(current_arg++, cl_uint(size));
|
||||
set_arguments(array_expressions, kernel, current_arg);
|
||||
set_arguments(expressions, kernel, current_arg);
|
||||
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
|
||||
queue.flush();
|
||||
}
|
||||
|
Reference in New Issue
Block a user