Removing ViennaCL dependency

This commit is contained in:
Philippe Tillet
2014-11-09 16:29:55 -05:00
parent 8bd9aad34c
commit a6de4c96be
22 changed files with 1627 additions and 943 deletions

View File

@@ -7,13 +7,7 @@
#include <stdexcept>
#include "atidlas/tools/shared_ptr.hpp"
#include "viennacl/scheduler/io.hpp"
#include "viennacl/ocl/forwards.h"
#include "viennacl/scheduler/forwards.h"
#include "viennacl/backend/mem_handle.hpp"
#include "viennacl/device_specific/forwards.h"
#include "atidlas/scheduler/forwards.h"
namespace atidlas
{
@@ -50,22 +44,22 @@ struct atidlas_int_tuple
std::string bound1;
};
inline bool is_scalar_reduction(viennacl::scheduler::statement_node const & node)
inline bool is_scalar_reduction(scheduler::statement_node const & node)
{
return node.op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE || node.op.type_family==viennacl::scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY;
return node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE || node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY;
}
inline bool is_vector_reduction(viennacl::scheduler::statement_node const & node)
inline bool is_vector_reduction(scheduler::statement_node const & node)
{
return node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
|| node.op.type_family==viennacl::scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
|| node.op.type_family==viennacl::scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY;
return node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
|| node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
|| node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY;
}
inline viennacl::scheduler::statement_node const & lhs_most(viennacl::scheduler::statement::container_type const & array, size_t root)
inline scheduler::statement_node const & lhs_most(scheduler::statement::container_type const & array, size_t root)
{
viennacl::scheduler::statement_node const * current = &array[root];
while (current->lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
scheduler::statement_node const * current = &array[root];
while (current->lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
current = &array[current->lhs.node_index];
return *current;
}
@@ -162,38 +156,27 @@ namespace tools
{
template<class Fun>
inline void traverse(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect);
inline void traverse(scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect);
inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap<std::string, std::string> const & accessors,
viennacl::scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed);
inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors, viennacl::scheduler::statement const & statement, atidlas_int_t root_idx,mapping_type const & mapping);
scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed);
inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors, scheduler::statement const & statement, atidlas_int_t root_idx,mapping_type const & mapping);
}
using viennacl::scheduler::INT_TYPE;
using viennacl::scheduler::UINT_TYPE;
using viennacl::scheduler::ULONG_TYPE;
using viennacl::scheduler::LONG_TYPE;
using viennacl::scheduler::FLOAT_TYPE;
using viennacl::scheduler::DOUBLE_TYPE;
typedef cl_uint vendor_id_type;
typedef cl_device_type device_type;
typedef std::string device_name_type;
class symbolic_binder
{
public:
virtual ~symbolic_binder(){ }
virtual bool bind(viennacl::backend::mem_handle const * ph) = 0;
virtual unsigned int get(viennacl::backend::mem_handle const * ph) = 0;
virtual bool bind(cl::Buffer const * ph) = 0;
virtual unsigned int get(cl::Buffer const * ph) = 0;
};
class bind_to_handle : public symbolic_binder
{
public:
bind_to_handle() : current_arg_(0){ }
bool bind(viennacl::backend::mem_handle const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; }
unsigned int get(viennacl::backend::mem_handle const * ph){ return bind(ph)?current_arg_++:memory.at((void*)ph); }
bool bind(cl::Buffer const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; }
unsigned int get(cl::Buffer const * ph){ return bind(ph)?current_arg_++:memory.at((void*)ph); }
private:
unsigned int current_arg_;
std::map<void*,unsigned int> memory;
@@ -203,8 +186,8 @@ class bind_all_unique : public symbolic_binder
{
public:
bind_all_unique() : current_arg_(0){ }
bool bind(viennacl::backend::mem_handle const *) {return true; }
unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; }
bool bind(cl::Buffer const *) {return true; }
unsigned int get(cl::Buffer const *){ return current_arg_++; }
private:
unsigned int current_arg_;
std::map<void*,unsigned int> memory;
@@ -226,8 +209,6 @@ inline tools::shared_ptr<symbolic_binder> make_binder(binding_policy_t policy)
template<char C>
struct char_to_type{ };
typedef viennacl::device_specific::statements_container statements_container;
}
#endif

View File

@@ -3,9 +3,9 @@
#include <string>
#include "viennacl/scheduler/forwards.h"
#include "atidlas/forwards.h"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/tools/find_and_replace.hpp"
#include "atidlas/backend/tools/misc.hpp"
@@ -63,10 +63,10 @@ protected:
public:
struct node_info
{
node_info(mapping_type const * _mapping, viennacl::scheduler::statement const * _statement, atidlas_int_t _root_idx) :
node_info(mapping_type const * _mapping, scheduler::statement const * _statement, atidlas_int_t _root_idx) :
mapping(_mapping), statement(_statement), root_idx(_root_idx) { }
mapping_type const * mapping;
viennacl::scheduler::statement const * statement;
scheduler::statement const * statement;
atidlas_int_t root_idx;
};
@@ -153,16 +153,16 @@ public:
mapped_reduction(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) : mapped_object(scalartype, id, type_key), binary_leaf(info){ }
atidlas_int_t root_idx() const { return info_.root_idx; }
viennacl::scheduler::statement const & statement() const { return *info_.statement; }
viennacl::scheduler::statement_node root_node() const { return statement().array()[root_idx()]; }
scheduler::statement const & statement() const { return *info_.statement; }
scheduler::statement_node root_node() const { return statement().array()[root_idx()]; }
bool is_index_reduction() const { return tools::is_index_reduction(info_.statement->array()[info_.root_idx].op); }
viennacl::scheduler::op_element root_op() const
scheduler::op_element root_op() const
{
viennacl::scheduler::op_element res = info_.statement->array()[info_.root_idx].op;
if (res.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
||res.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE)
res.type = viennacl::scheduler::OPERATION_BINARY_ADD_TYPE;
scheduler::op_element res = info_.statement->array()[info_.root_idx].op;
if (res.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
||res.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE)
res.type = scheduler::OPERATION_BINARY_ADD_TYPE;
return res;
}
};

View File

@@ -5,9 +5,7 @@
#include <vector>
#include "atidlas/backend/templates/template_base.hpp"
#include "viennacl/scheduler/forwards.h"
#include "viennacl/tools/tools.hpp"
#include "atidlas/scheduler/forwards.h"
namespace atidlas
{
@@ -28,7 +26,7 @@ public:
class matrix_axpy_template : public template_base_impl<matrix_axpy_template, matrix_axpy_parameters>
{
private:
int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const
int check_invalid_impl(cl::Device const &, statements_container const &) const
{
if (p_.simd_width>1)
return TEMPLATE_INVALID_SIMD_WIDTH;
@@ -99,14 +97,14 @@ public:
std::vector<atidlas_int_t> input_sizes(statements_container const & statements)
{
viennacl::scheduler::statement const & statement = statements.data().front();
scheduler::statement const & statement = statements.data().front();
std::pair<atidlas_int_t, atidlas_int_t> size = matrix_size(lhs_most(statement.array(), statement.root()), up_to_internal_size_);
return tools::make_vector<atidlas_int_t>() << size.first << size.second;
}
void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements)
{
viennacl::ocl::kernel & kernel = programs[0].program().get_kernel(kernel_prefix);
cl::Kernel & kernel = programs[0].program().get_kernel(kernel_prefix);
kernel.local_work_size(0, p_.local_size_0);
kernel.local_work_size(1, p_.local_size_1);
@@ -119,7 +117,7 @@ public:
kernel.arg(current_arg++, cl_uint(MN[1]));
set_arguments(statements, kernel, current_arg);
viennacl::ocl::enqueue(kernel);
// viennacl::ocl::enqueue(kernel);
}

View File

@@ -4,14 +4,9 @@
#include <vector>
#include "viennacl/scheduler/forwards.h"
#include "viennacl/matrix_def.hpp"
#include "viennacl/matrix_proxy.hpp"
#include "viennacl/forwards.h"
#include "viennacl/tools/tools.hpp"
#include "atidlas/forwards.h"
#include "atidlas/backend/templates/template_base.hpp"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/tools/align.hpp"
namespace atidlas
@@ -50,8 +45,8 @@ class matrix_product_template : public template_base_impl<matrix_product_templat
private:
unsigned int lmem_usage(statements_container const & statements) const
{
viennacl::scheduler::statement const & statement = statements.data().front();
viennacl::scheduler::statement_node_numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
scheduler::statement const & statement = statements.data().front();
scheduler::numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
unsigned int N = 0;
if (p_.A_fetching_policy==FETCH_FROM_LOCAL)
@@ -63,14 +58,14 @@ private:
unsigned int registers_usage(statements_container const & statements) const
{
viennacl::scheduler::statement const & statement = statements.data().front();
viennacl::scheduler::statement_node_numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
scheduler::statement const & statement = statements.data().front();
scheduler::numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
unsigned int N = p_.mS * p_.nS + p_.mS * p_.kS + p_.kS * p_.nS;
return N*tools::size_of(numeric_type);
}
int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const
int check_invalid_impl(cl::Device const &, statements_container const &) const
{
if (p_.A_fetching_policy!=FETCH_FROM_LOCAL && p_.B_fetching_policy!=FETCH_FROM_LOCAL&& (p_.local_fetch_0!=0 || p_.local_fetch_1!=0))
return TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH;
@@ -118,15 +113,15 @@ private:
return TEMPLATE_VALID;
}
static void parse(viennacl::scheduler::statement const & s,
static void parse(scheduler::statement const & s,
atidlas_int_t & C_idx, leaf_t & C_leaf, atidlas_int_t & alpha_idx, leaf_t & alpha_leaf,
atidlas_int_t & A_idx, leaf_t & A_leaf, bool& A_trans, atidlas_int_t & B_idx, leaf_t & B_leaf, bool& B_trans,
atidlas_int_t & beta_idx, leaf_t & beta_leaf)
{
using namespace tools;
using namespace viennacl::scheduler;
using namespace scheduler;
viennacl::scheduler::statement::container_type const & array = s.array();
scheduler::statement::container_type const & array = s.array();
atidlas_int_t root_idx = s.root();
C_idx = root_idx;
@@ -206,7 +201,7 @@ private:
/// INIT
/// //////////////
tools::kernel_generation_stream stream;
viennacl::scheduler::statement const & st = statements.data().front();
scheduler::statement const & st = statements.data().front();
mapping_type const & mapping = mappings.front();
bool A_trans = false, B_trans = false;
@@ -658,23 +653,23 @@ private:
}
template<class NumericT>
void enqueue_block(viennacl::scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K,
viennacl::scheduler::lhs_rhs_element& eA, viennacl::scheduler::lhs_rhs_element& eB, viennacl::scheduler::lhs_rhs_element& eC, viennacl::scheduler::lhs_rhs_element& ebeta,
void enqueue_block(scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K,
scheduler::lhs_rhs_element& eA, scheduler::lhs_rhs_element& eB, scheduler::lhs_rhs_element& eC, scheduler::lhs_rhs_element& ebeta,
viennacl::matrix_base<NumericT> const & A, viennacl::matrix_base<NumericT> const & B, viennacl::matrix_base<NumericT> const & C, NumericT beta,
std::vector<lazy_program_compiler> & programs, std::string const & kernel_prefix, int id)
{
if (A.size1()==0 || A.size2()==0 || B.size1()==0 || B.size2()==0 || C.size1()==0 || C.size2()==0)
return;
viennacl::ocl::kernel& kernel = programs[id].program().get_kernel(kernel_prefix);
cl::Kernel& kernel = programs[id].program().get_kernel(kernel_prefix);
kernel.local_work_size(0, p_.local_size_0);
kernel.local_work_size(1, p_.local_size_1);
viennacl::scheduler::statement::assign_element(eA, A);
viennacl::scheduler::statement::assign_element(eB, B);
viennacl::scheduler::statement::assign_element(eC, C);
viennacl::scheduler::statement::assign_element(ebeta, beta);
scheduler::statement::assign_element(eA, A);
scheduler::statement::assign_element(eB, B);
scheduler::statement::assign_element(eC, C);
scheduler::statement::assign_element(ebeta, beta);
if (id==1)
{
@@ -691,12 +686,12 @@ private:
kernel.arg(current_arg++, cl_uint(N));
kernel.arg(current_arg++, cl_uint(K));
set_arguments(statement, kernel, current_arg);
viennacl::ocl::enqueue(kernel);
// viennacl::ocl::enqueue(kernel);
}
template<class NumericT>
viennacl::matrix_slice< viennacl::matrix_base<NumericT> > create_slice(viennacl::matrix_base<NumericT>* viennacl::scheduler::lhs_rhs_element::*ptr, viennacl::scheduler::lhs_rhs_element const & element,
viennacl::matrix_slice< viennacl::matrix_base<NumericT> > create_slice(viennacl::matrix_base<NumericT>* scheduler::lhs_rhs_element::*ptr, scheduler::lhs_rhs_element const & element,
atidlas_int_t s0_0, atidlas_int_t s0_1, atidlas_int_t s1_0, atidlas_int_t s1_1, bool swap)
{
viennacl::matrix_base<NumericT> & M = *(element.*ptr);
@@ -708,20 +703,20 @@ private:
}
template<class NumericT>
void enqueue_impl(viennacl::matrix_base<NumericT>* viennacl::scheduler::lhs_rhs_element::*ptr_matrix,
viennacl::scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K,
viennacl::scheduler::lhs_rhs_element & A, viennacl::scheduler::lhs_rhs_element & B, viennacl::scheduler::lhs_rhs_element & C, viennacl::scheduler::lhs_rhs_element & beta,
void enqueue_impl(viennacl::matrix_base<NumericT>* scheduler::lhs_rhs_element::*ptr_matrix,
scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K,
scheduler::lhs_rhs_element & A, scheduler::lhs_rhs_element & B, scheduler::lhs_rhs_element & C, scheduler::lhs_rhs_element & beta,
NumericT beta_value, std::vector<lazy_program_compiler> & programs, std::string const & kernel_prefix)
{
using namespace tools;
std::string kernel_prefix_fb = kernel_prefix + "_fb";
atidlas_int_t ldstrideA = call_on_matrix(A, leading_stride_fun());
atidlas_int_t ldstrideB = call_on_matrix(B, leading_stride_fun());
atidlas_int_t ldstrideC = call_on_matrix(C, leading_stride_fun());
atidlas_int_t ldstartA = call_on_matrix(A, leading_start_fun());
atidlas_int_t ldstartB = call_on_matrix(B, leading_start_fun());
atidlas_int_t ldstrideA = traits::ldstride(*A.matrix);
atidlas_int_t ldstrideB = traits::ldstride(*B.matrix);
atidlas_int_t ldstrideC = traits::ldstride(*C.matrix);
atidlas_int_t ldstartA = traits::ldstart(*A.matrix);
atidlas_int_t ldstartB = traits::ldstart(*B.matrix);
bool swap_A = (A_trans_=='T');
bool swap_B = (B_trans_=='T');
@@ -735,9 +730,9 @@ private:
}
viennacl::scheduler::lhs_rhs_element Acopy = A;
viennacl::scheduler::lhs_rhs_element Bcopy = B;
viennacl::scheduler::lhs_rhs_element Ccopy = C;
scheduler::lhs_rhs_element Acopy = A;
scheduler::lhs_rhs_element Bcopy = B;
scheduler::lhs_rhs_element Ccopy = C;
atidlas_int_t lM = M / p_.mL * p_.mL;
atidlas_int_t lN = N / p_.nL * p_.nL;
@@ -764,14 +759,14 @@ private:
{
using namespace tools;
viennacl::scheduler::statement const & st = statements.data().front();
scheduler::statement const & st = statements.data().front();
parse(st, C_idx, C_leaf, alpha_idx, alpha_leaf, A_idx, A_leaf, A_trans, B_idx, B_leaf, B_trans, beta_idx, beta_leaf);
viennacl::scheduler::lhs_rhs_element const & A = tools::lhs_rhs_element(st, A_idx, A_leaf);
viennacl::scheduler::lhs_rhs_element const & C = tools::lhs_rhs_element(st, C_idx, C_leaf);
scheduler::lhs_rhs_element const & A = tools::lhs_rhs_element(st, A_idx, A_leaf);
scheduler::lhs_rhs_element const & C = tools::lhs_rhs_element(st, C_idx, C_leaf);
atidlas_int_t M = call_on_matrix(C, size1_fun());
atidlas_int_t N = call_on_matrix(C, size2_fun());
atidlas_int_t K = A_trans?call_on_matrix(A, size1_fun()):call_on_matrix(A, size2_fun());
atidlas_int_t M = traits::size1(*C.matrix);
atidlas_int_t N = traits::size2(*C.matrix);
atidlas_int_t K = A_trans?traits::size1(*A.matrix):traits::size2(*A.matrix);
return tools::make_vector<atidlas_int_t>() << M << N << K;
}
@@ -796,16 +791,16 @@ public:
leaf_t C_leaf=LHS_NODE_TYPE, A_leaf=LHS_NODE_TYPE, B_leaf=LHS_NODE_TYPE, alpha_leaf=LHS_NODE_TYPE, beta_leaf=LHS_NODE_TYPE;
std::vector<atidlas_int_t> MNK = infos(statements,A_trans,B_trans,C_idx,A_idx,B_idx,alpha_idx,beta_idx,C_leaf,A_leaf,B_leaf,alpha_leaf,beta_leaf);
viennacl::scheduler::statement stcopy = statements.data().front();
viennacl::scheduler::lhs_rhs_element& A = tools::lhs_rhs_element(stcopy, A_idx, A_leaf);
viennacl::scheduler::lhs_rhs_element& B = tools::lhs_rhs_element(stcopy, B_idx, B_leaf);
viennacl::scheduler::lhs_rhs_element& C = tools::lhs_rhs_element(stcopy, C_idx, C_leaf);
viennacl::scheduler::lhs_rhs_element& beta = tools::lhs_rhs_element(stcopy, beta_idx, beta_leaf);
scheduler::statement stcopy = statements.data().front();
scheduler::lhs_rhs_element& A = tools::lhs_rhs_element(stcopy, A_idx, A_leaf);
scheduler::lhs_rhs_element& B = tools::lhs_rhs_element(stcopy, B_idx, B_leaf);
scheduler::lhs_rhs_element& C = tools::lhs_rhs_element(stcopy, C_idx, C_leaf);
scheduler::lhs_rhs_element& beta = tools::lhs_rhs_element(stcopy, beta_idx, beta_leaf);
if (C.numeric_type==viennacl::scheduler::FLOAT_TYPE)
enqueue_impl<float>(&viennacl::scheduler::lhs_rhs_element::matrix_float, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_float, programs, kernel_prefix);
else if (C.numeric_type==viennacl::scheduler::DOUBLE_TYPE)
enqueue_impl<double>(&viennacl::scheduler::lhs_rhs_element::matrix_double, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_double, programs, kernel_prefix);
if (C.numeric_type==scheduler::FLOAT_TYPE)
enqueue_impl<float>(&scheduler::lhs_rhs_element::matrix_float, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_float, programs, kernel_prefix);
else if (C.numeric_type==scheduler::DOUBLE_TYPE)
enqueue_impl<double>(&scheduler::lhs_rhs_element::matrix_double, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_double, programs, kernel_prefix);
else
throw generator_not_supported_exception("GEMM only supported for float/double");

View File

@@ -4,10 +4,7 @@
#include <vector>
#include "viennacl/backend/opencl.hpp"
#include "viennacl/scheduler/forwards.h"
#include "viennacl/tools/tools.hpp"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/backend/templates/template_base.hpp"
namespace atidlas
@@ -33,8 +30,8 @@ private:
unsigned int res = 0;
for(statements_container::data_type::const_iterator it = statements.data().begin() ; it != statements.data().end() ; ++it)
{
viennacl::scheduler::statement const & statement = statements.data().front();
viennacl::scheduler::statement_node_numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
scheduler::statement const & statement = statements.data().front();
scheduler::numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
res += p_.local_size_0*tools::size_of(numeric_type);
}
return res;
@@ -161,7 +158,7 @@ private:
accessors["matrix_diag"] = str[a];
accessors["scalar"] = "#namereg";
std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors);
if (exprs[k]->root_node().op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE)
if (exprs[k]->root_node().op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE)
value+= "*" + exprs[k]->evaluate_recursive(RHS_NODE_TYPE, accessors);
if (exprs[k]->is_index_reduction())
@@ -294,7 +291,7 @@ public:
{
std::vector<atidlas_int_t> size = input_sizes(statements);
std::vector<viennacl::scheduler::statement_node const *> reductions;
std::vector<scheduler::statement_node const *> reductions;
for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
{
std::vector<size_t> reductions_idx = tools::filter_nodes(&tools::is_reduction, *it, false);
@@ -302,7 +299,7 @@ public:
reductions.push_back(&it->array()[*itt]);
}
viennacl::scheduler::statement const & statement = statements.data().front();
scheduler::statement const & statement = statements.data().front();
unsigned int scalartype_size = tools::size_of(lhs_most(statement.array(), statement.root()).lhs.numeric_type);
viennacl::ocl::kernel * kernels[2];
@@ -329,7 +326,7 @@ public:
kernels[k]->arg(n_arg++, cl_uint(size[0]));
unsigned int i = 0;
unsigned int j = 0;
for (std::vector<viennacl::scheduler::statement_node const *>::const_iterator it = reductions.begin(); it != reductions.end(); ++it)
for (std::vector<scheduler::statement_node const *>::const_iterator it = reductions.begin(); it != reductions.end(); ++it)
{
if (tools::is_index_reduction((*it)->op))
{

View File

@@ -4,10 +4,8 @@
#include <vector>
#include "viennacl/scheduler/forwards.h"
#include "viennacl/tools/tools.hpp"
#include "viennacl/scheduler/io.hpp"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/traits/size.hpp"
#include "atidlas/backend/templates/template_base.hpp"
namespace atidlas
@@ -27,7 +25,7 @@ struct row_wise_reduction_parameters : public template_base::parameters_type
class row_wise_reduction_template : public template_base_impl<row_wise_reduction_template, row_wise_reduction_parameters>
{
private:
virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const
virtual int check_invalid_impl(cl::Device const &, statements_container const &) const
{
if (p_.fetch_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
@@ -39,7 +37,7 @@ private:
return p_.local_size_0*(p_.local_size_1+1);
}
static void parse(viennacl::scheduler::statement const & statement, std::vector<size_t> & idx, bool & is_trans, viennacl::scheduler::lhs_rhs_element & matrix)
static void parse(scheduler::statement const & statement, std::vector<size_t> & idx, bool & is_trans, scheduler::lhs_rhs_element & matrix)
{
idx = tools::filter_nodes(&tools::is_reduction, statement, false);
is_trans = is_node_trans(statement.array(), idx[0], LHS_NODE_TYPE);
@@ -125,7 +123,7 @@ private:
accessors["vector"] = str[a];
accessors["scalar"] = "#namereg";
std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors);
if (exprs[k]->root_node().op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE)
if (exprs[k]->root_node().op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE)
value+= "*" + exprs[k]->evaluate_recursive(RHS_NODE_TYPE, accessors);
if (exprs[k]->is_index_reduction())
@@ -201,7 +199,7 @@ private:
for (mit = mappings.begin(), sit = statements.data().begin(); mit != mappings.end(); ++mit, ++sit)
{
std::vector<size_t> idx;
viennacl::scheduler::lhs_rhs_element A;
scheduler::lhs_rhs_element A;
parse(*sit, idx, is_trans, A);
for (unsigned int j = 0; j < idx.size(); ++j)
exprs.push_back((mapped_row_wise_reduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get()));
@@ -222,10 +220,10 @@ private:
std::vector<atidlas_int_t> infos(statements_container const & statements, bool & is_trans)
{
std::vector<size_t> idx;
viennacl::scheduler::lhs_rhs_element A;
scheduler::lhs_rhs_element A;
parse(statements.data().front(), idx, is_trans, A);
atidlas_int_t M = tools::call_on_matrix(A, tools::size1_fun());
atidlas_int_t N = tools::call_on_matrix(A, tools::size2_fun());
atidlas_int_t M = traits::size1(*A.matrix);
atidlas_int_t N = traits::size2(*A.matrix);
if(is_trans)
std::swap(M,N);
return tools::make_vector<atidlas_int_t>() << M << N;
@@ -245,7 +243,7 @@ public:
bool is_trans;
std::vector<atidlas_int_t> MN = infos(statements, is_trans);
viennacl::ocl::kernel * kernel;
cl::Kernel * kernel;
if(is_trans && p_.simd_width>1)
{
if (has_strided_access(statements))
@@ -264,7 +262,7 @@ public:
kernel->arg(current_arg++, cl_uint(MN[0]));
kernel->arg(current_arg++, cl_uint(MN[1]));
set_arguments(statements, *kernel, current_arg);
viennacl::ocl::enqueue(*kernel);
// cl::CommandQueue().enqueue()
}
};

View File

@@ -5,13 +5,7 @@
#include <list>
#include <set>
#include "viennacl/ocl/kernel.hpp"
#include "viennacl/ocl/device.hpp"
#include "viennacl/ocl/device_utils.hpp"
#include "viennacl/scheduler/forwards.h"
#include "viennacl/scheduler/io.hpp"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/tools/lazy_program_compiler.hpp"
#include "atidlas/backend/templates/template_base.hpp"
#include "atidlas/backend/tools/misc.hpp"
@@ -45,94 +39,95 @@ private:
/** @brief Functor to map the statements to the types defined in mapped_objects.hpp */
class map_functor : public tools::traversal_functor
{
viennacl::scheduler::statement_node_numeric_type numeric_type(viennacl::scheduler::statement const * statement, atidlas_int_t root_idx) const
numeric_type get_numeric_type(scheduler::statement const * statement, atidlas_int_t root_idx) const
{
viennacl::scheduler::statement_node const * root_node = &statement->array()[root_idx];
while (root_node->lhs.numeric_type==viennacl::scheduler::INVALID_NUMERIC_TYPE)
scheduler::statement_node const * root_node = &statement->array()[root_idx];
while (root_node->lhs.numeric_t==INVALID_NUMERIC_TYPE)
root_node = &statement->array()[root_node->lhs.node_index];
return root_node->lhs.numeric_type;
return root_node->lhs.numeric_t;
}
public:
typedef tools::shared_ptr<mapped_object> result_type;
map_functor(symbolic_binder & binder, mapping_type & mapping) : binder_(binder), mapping_(mapping){ }
/** @brief Binary leaf */
template<class T>
result_type binary_leaf(viennacl::scheduler::statement const * statement, atidlas_int_t root_idx, mapping_type const * mapping) const
tools::shared_ptr<mapped_object> binary_leaf(scheduler::statement const * statement, atidlas_int_t root_idx, mapping_type const * mapping) const
{
return result_type(new T(tools::numeric_type_to_string(numeric_type(statement,root_idx)), binder_.get(NULL), mapped_object::node_info(mapping, statement, root_idx)));
return tools::shared_ptr<mapped_object>(new T(tools::numeric_type_to_string(get_numeric_type(statement,root_idx)), binder_.get(NULL), mapped_object::node_info(mapping, statement, root_idx)));
}
template<class NumericT>
result_type operator()(NumericT const & /*scalar*/) const
{
return result_type(new mapped_host_scalar(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
}
// template<class NumericT>
// tools::shared_ptr<mapped_object> operator()(NumericT const & /*scalar*/) const
// {
// return tools::shared_ptr<mapped_object>(new mapped_host_scalar(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
// }
/** @brief Scalar mapping */
template<class NumericT>
result_type operator()(viennacl::scalar<NumericT> const & scal) const
{
return result_type(new mapped_scalar(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(scal))));
}
// /** @brief Scalar mapping */
// template<class NumericT>
// tools::shared_ptr<mapped_object> operator()(viennacl::scalar<NumericT> const & scal) const
// {
// return tools::shared_ptr<mapped_object>(new mapped_scalar(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(scal))));
// }
/** @brief Vector mapping */
template<class NumericT>
result_type operator()(viennacl::vector_base<NumericT> const & vec) const
tools::shared_ptr<mapped_object> create_vector(vector_base const & vector) const
{ return tools::shared_ptr<mapped_object>(new mapped_vector(tools::numeric_type_to_string(vector.dtype()), binder_.get(&vector.data()))); }
// /** @brief Implicit vector mapping */
// template<class NumericT>
// tools::shared_ptr<mapped_object> operator()(viennacl::implicit_vector_base<NumericT> const & /*vec*/) const
// {
// return tools::shared_ptr<mapped_object>(new mapped_implicit_vector(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
// }
// /** @brief Matrix mapping */
// template<class NumericT>
// tools::shared_ptr<mapped_object> operator()(viennacl::matrix_base<NumericT> const & mat) const
// {
// return tools::shared_ptr<mapped_object>(new mapped_matrix(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(mat))));
// }
// /** @brief Implicit matrix mapping */
// template<class NumericT>
// tools::shared_ptr<mapped_object> operator()(viennacl::implicit_matrix_base<NumericT> const & /*mat*/) const
// {
// return tools::shared_ptr<mapped_object>(new mapped_implicit_matrix(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
// }
tools::shared_ptr<mapped_object> create(scheduler::lhs_rhs_element const & lhs_rhs) const
{
return result_type(new mapped_vector(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(vec))));
// if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE)
return create_vector(*lhs_rhs.vector);
}
/** @brief Implicit vector mapping */
template<class NumericT>
result_type operator()(viennacl::implicit_vector_base<NumericT> const & /*vec*/) const
{
return result_type(new mapped_implicit_vector(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
}
public:
/** @brief Matrix mapping */
template<class NumericT>
result_type operator()(viennacl::matrix_base<NumericT> const & mat) const
{
return result_type(new mapped_matrix(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(mat))));
}
/** @brief Implicit matrix mapping */
template<class NumericT>
result_type operator()(viennacl::implicit_matrix_base<NumericT> const & /*mat*/) const
{
return result_type(new mapped_implicit_matrix(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
}
map_functor(symbolic_binder & binder, mapping_type & mapping) : binder_(binder), mapping_(mapping){ }
/** @brief Traversal functor */
void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const {
void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const {
mapping_type::key_type key(root_idx, leaf_t);
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
scheduler::statement_node const & root_node = statement.array()[root_idx];
if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
mapping_.insert(mapping_type::value_type(key, tools::call_on_element(root_node.lhs, *this)));
else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
mapping_.insert(mapping_type::value_type(key, tools::call_on_element(root_node.rhs, *this)));
if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
mapping_.insert(mapping_type::value_type(key, create(root_node.lhs)));
else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
mapping_.insert(mapping_type::value_type(key, create(root_node.rhs)));
else if ( leaf_t== PARENT_NODE_TYPE)
{
if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
if (root_node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_vector_diag>(&statement, root_idx, &mapping_)));
else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE)
else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_diag>(&statement, root_idx, &mapping_)));
else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE)
else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_row>(&statement, root_idx, &mapping_)));
else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE)
else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&statement, root_idx, &mapping_)));
else if (is_scalar_reduction(root_node))
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_scalar_reduction>(&statement, root_idx, &mapping_)));
else if (is_vector_reduction(root_node))
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_row_wise_reduction>(&statement, root_idx, &mapping_)));
else if (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE)
else if (root_node.op.type == scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_product>(&statement, root_idx, &mapping_)));
else if (root_node.op.type == viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE)
else if (root_node.op.type == scheduler::OPERATION_UNARY_TRANS_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_trans>(&statement, root_idx, &mapping_)));
}
}
@@ -148,86 +143,91 @@ private:
public:
typedef void result_type;
set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, viennacl::ocl::kernel & kernel) : binder_(binder), current_arg_(current_arg), kernel_(kernel){ }
set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, cl::Kernel & kernel) : binder_(binder), current_arg_(current_arg), kernel_(kernel){ }
template<class NumericT>
result_type operator()(NumericT const & scal) const
{
typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
kernel_.arg(current_arg_++, cl_scalartype(scal));
}
// template<class NumericT>
// void operator()(NumericT const & scal) const
// {
// typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
// kernel_.arg(current_arg_++, cl_scalartype(scal));
// }
/** @brief Scalar mapping */
template<class NumericT>
result_type operator()(viennacl::scalar<NumericT> const & scal) const
{
if (binder_.bind(&viennacl::traits::handle(scal)))
kernel_.arg(current_arg_++, scal.handle().opencl_handle());
}
// /** @brief Scalar mapping */
// template<class NumericT>
// void operator()(viennacl::scalar<NumericT> const & scal) const
// {
// if (binder_.bind(&viennacl::traits::handle(scal)))
// kernel_.arg(current_arg_++, scal.handle().opencl_handle());
// }
/** @brief Vector mapping */
template<class NumericT>
result_type operator()(viennacl::vector_base<NumericT> const & vec) const
void set_vector_arguments(vector_base const & v) const
{
if (binder_.bind(&viennacl::traits::handle(vec)))
if (binder_.bind(&v.data()))
{
kernel_.arg(current_arg_++, vec.handle().opencl_handle());
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start(vec)));
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride(vec)));
kernel_.setArg(current_arg_++, v.data());
kernel_.setArg(current_arg_++, cl_uint(v.start()));
kernel_.setArg(current_arg_++, cl_uint(v.stride()));
}
}
/** @brief Implicit vector mapping */
template<class NumericT>
result_type operator()(viennacl::implicit_vector_base<NumericT> const & vec) const
{
typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
kernel_.arg(current_arg_++, cl_scalartype(vec.value()));
if (vec.has_index())
kernel_.arg(current_arg_++, cl_uint(vec.index()));
}
// /** @brief Implicit vector mapping */
// template<class NumericT>
// void operator()(viennacl::implicit_vector_base<NumericT> const & vec) const
// {
// typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
// kernel_.arg(current_arg_++, cl_scalartype(vec.value()));
// if (vec.has_index())
// kernel_.arg(current_arg_++, cl_uint(vec.index()));
// }
/** @brief Matrix mapping */
template<class NumericT>
result_type operator()(viennacl::matrix_base<NumericT> const & mat) const
{
if (binder_.bind(&viennacl::traits::handle(mat)))
{
kernel_.arg(current_arg_++, mat.handle().opencl_handle());
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::ld(mat)));
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat)));
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat)));
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat)));
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat)));
}
}
// /** @brief Matrix mapping */
// template<class NumericT>
// void operator()(viennacl::matrix_base<NumericT> const & mat) const
// {
// if (binder_.bind(&viennacl::traits::handle(mat)))
// {
// kernel_.arg(current_arg_++, mat.handle().opencl_handle());
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::ld(mat)));
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat)));
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat)));
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat)));
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat)));
// }
// }
/** @brief Implicit matrix mapping */
template<class NumericT>
result_type operator()(viennacl::implicit_matrix_base<NumericT> const & mat) const
// /** @brief Implicit matrix mapping */
// template<class NumericT>
// void operator()(viennacl::implicit_matrix_base<NumericT> const & mat) const
// {
// kernel_.arg(current_arg_++, typename viennacl::result_of::cl_type<NumericT>::type(mat.value()));
// }
void set_arguments(scheduler::lhs_rhs_element const & lhs_rhs) const
{
kernel_.arg(current_arg_++, typename viennacl::result_of::cl_type<NumericT>::type(mat.value()));
// if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE)
set_vector_arguments(*lhs_rhs.vector);
}
/** @brief Traversal functor: */
void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const
void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const
{
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
tools::call_on_element(root_node.lhs, *this);
else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
tools::call_on_element(root_node.rhs, *this);
scheduler::statement_node const & root_node = statement.array()[root_idx];
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
set_arguments(root_node.lhs);
else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
set_arguments(root_node.rhs);
}
private:
symbolic_binder & binder_;
unsigned int & current_arg_;
viennacl::ocl::kernel & kernel_;
cl::Kernel & kernel_;
};
protected:
static inline void compute_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, viennacl::scheduler::op_element const & op)
static inline void compute_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, scheduler::op_element const & op)
{
if (tools::elementwise_function(op))
os << acc << "=" << tools::evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl;
@@ -235,15 +235,15 @@ protected:
os << acc << "= (" << acc << ")" << tools::evaluate(op.type) << "(" << cur << ");" << std::endl;
}
static inline void compute_index_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, viennacl::scheduler::op_element const & op)
static inline void compute_index_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, scheduler::op_element const & op)
{
// os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl;
os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl;
os << acc_value << "=";
if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE) os << "fmax";
if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE) os << "max";
if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE) os << "fmin";
if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE) os << "min";
if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE) os << "fmax";
if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE) os << "max";
if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE) os << "fmin";
if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE) os << "min";
os << "(" << acc_value << "," << cur_value << ");"<< std::endl;
}
@@ -269,27 +269,27 @@ protected:
}
}
static inline std::string neutral_element(viennacl::scheduler::op_element const & op)
static inline std::string neutral_element(scheduler::op_element const & op)
{
switch (op.type)
{
case viennacl::scheduler::OPERATION_BINARY_ADD_TYPE : return "0";
case viennacl::scheduler::OPERATION_BINARY_MULT_TYPE : return "1";
case viennacl::scheduler::OPERATION_BINARY_DIV_TYPE : return "1";
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "-INFINITY";
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "-INFINITY";
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_MAX_TYPE : return "-INFINITY";
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "-INFINITY";
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "INFINITY";
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "INFINITY";
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_MIN_TYPE : return "INFINITY";
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "INFINITY";
case scheduler::OPERATION_BINARY_ADD_TYPE : return "0";
case scheduler::OPERATION_BINARY_MULT_TYPE : return "1";
case scheduler::OPERATION_BINARY_DIV_TYPE : return "1";
case scheduler::OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "-INFINITY";
case scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "-INFINITY";
case scheduler::OPERATION_BINARY_ELEMENT_MAX_TYPE : return "-INFINITY";
case scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "-INFINITY";
case scheduler::OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "INFINITY";
case scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "INFINITY";
case scheduler::OPERATION_BINARY_ELEMENT_MIN_TYPE : return "INFINITY";
case scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "INFINITY";
default: throw generator_not_supported_exception("Unsupported reduction operator : no neutral element known");
}
}
static std::string generate_arguments(std::vector<mapping_type> const & mappings, std::multimap<std::string, std::string> const & accessors, statements_container const & statements)
static std::string generate_arguments(std::vector<mapping_type> const & mappings, std::multimap<std::string, std::string> const & accessors, scheduler::statements_container const & statements)
{
tools::kernel_generation_stream stream;
tools::process(stream, PARENT_NODE_TYPE, accessors, statements, mappings);
@@ -308,7 +308,7 @@ protected:
return "__global " + data_type + "* #pointer, uint #start, uint #stride,";
}
static std::string generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, statements_container const & statements)
static std::string generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, scheduler::statements_container const & statements)
{
return generate_arguments(mappings, tools::create_process_accessors("scalar", "__global #scalartype* #pointer,")
("host_scalar", "#scalartype #name,")
@@ -320,10 +320,10 @@ protected:
void set_arguments(statements_container const & statements, viennacl::ocl::kernel & kernel, unsigned int & current_arg)
void set_arguments(scheduler::statements_container const & statements, cl::Kernel & kernel, unsigned int & current_arg)
{
tools::shared_ptr<symbolic_binder> binder = make_binder(binding_policy_);
for (statements_container::data_type::const_iterator itt = statements.data().begin(); itt != statements.data().end(); ++itt)
for (scheduler::statements_container::data_type::const_iterator itt = statements.data().begin(); itt != statements.data().end(); ++itt)
tools::traverse(*itt, itt->root(), set_arguments_functor(*binder, current_arg, kernel), true);
}
@@ -364,18 +364,18 @@ protected:
}
}
static bool is_node_trans(viennacl::scheduler::statement::container_type const & array, size_t root_idx, leaf_t leaf_type)
static bool is_node_trans(scheduler::statement::container_type const & array, size_t root_idx, leaf_t leaf_type)
{
bool res = false;
viennacl::scheduler::lhs_rhs_element viennacl::scheduler::statement_node::*ptr;
scheduler::lhs_rhs_element scheduler::statement_node::*ptr;
if (leaf_type==LHS_NODE_TYPE)
ptr = &viennacl::scheduler::statement_node::lhs;
ptr = &scheduler::statement_node::lhs;
else
ptr = &viennacl::scheduler::statement_node::rhs;
viennacl::scheduler::statement_node const * node = &array[root_idx];
while ((node->*ptr).type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
ptr = &scheduler::statement_node::rhs;
scheduler::statement_node const * node = &array[root_idx];
while ((node->*ptr).type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
{
if (array[(node->*ptr).node_index].op.type==viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE)
if (array[(node->*ptr).node_index].op.type==scheduler::OPERATION_UNARY_TRANS_TYPE)
res = !res;
node = &array[(node->*ptr).node_index];
}
@@ -392,28 +392,28 @@ protected:
return str + tools::to_string(suffixes[i]);
}
static bool is_offset_modifier(viennacl::scheduler::statement_node const & node)
static bool is_offset_modifier(scheduler::statement_node const & node)
{
return node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE;
return node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE
|| node.op.type==scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE
|| node.op.type==scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE
|| node.op.type==scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE;
}
static bool has_strided_access(statements_container const & statements)
static bool has_strided_access(scheduler::statements_container const & statements)
{
for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
{
//checks for vectors
std::vector<viennacl::scheduler::lhs_rhs_element> vectors = tools::filter_elements(viennacl::scheduler::DENSE_VECTOR_TYPE, *it);
for (std::vector<viennacl::scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
if (tools::call_on_vector(*itt, tools::stride_fun())>1)
std::vector<scheduler::lhs_rhs_element> vectors = tools::filter_elements(scheduler::DENSE_VECTOR_TYPE, *it);
for (std::vector<scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
if(itt->vector->stride())
return true;
//checks for matrix
std::vector<viennacl::scheduler::lhs_rhs_element> matrices = tools::filter_elements(viennacl::scheduler::DENSE_MATRIX_TYPE, *it);
for (std::vector<viennacl::scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
if (tools::call_on_matrix(*itt, tools::stride1_fun())>1 || tools::call_on_matrix(*itt, tools::stride2_fun())>2)
std::vector<scheduler::lhs_rhs_element> matrices = tools::filter_elements(scheduler::DENSE_MATRIX_TYPE, *it);
for (std::vector<scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
if (itt->matrix->stride1() > 1 || itt->matrix->stride2() > 1)
return true;
if(tools::filter_nodes(&is_offset_modifier, *it, true).empty()==false)
@@ -422,42 +422,42 @@ protected:
return false;
}
static atidlas_int_t vector_size(viennacl::scheduler::statement_node const & node, bool up_to_internal_size)
static atidlas_int_t vector_size(scheduler::statement_node const & node, bool up_to_internal_size)
{
using namespace viennacl::scheduler;
using namespace scheduler;
using namespace tools;
atidlas_int_t (vector_base::*funsize)(void) const = up_to_internal_size?&vector_base::internal_size:&vector_base::size;
atidlas_int_t (matrix_base::*funsize1)(void) const = up_to_internal_size?&matrix_base::internal_size1:&matrix_base::size1;
atidlas_int_t (matrix_base::*funsize2)(void) const = up_to_internal_size?&matrix_base::internal_size2:&matrix_base::size2;
if (node.op.type==OPERATION_BINARY_MATRIX_DIAG_TYPE)
{
atidlas_int_t size1 = up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun());
atidlas_int_t size2 = up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun());
return std::min<atidlas_int_t>(size1, size2);
}
return std::min<atidlas_int_t>((node.lhs.matrix->*funsize1)(), (node.lhs.matrix->*funsize2)());
else if (node.op.type==OPERATION_BINARY_MATRIX_ROW_TYPE)
return up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun());
return (node.lhs.matrix->*funsize2)();
else if (node.op.type==OPERATION_BINARY_MATRIX_COLUMN_TYPE)
return up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun());
return (node.lhs.matrix->*funsize1)();
else
return up_to_internal_size?call_on_vector(node.lhs, internal_size_fun()):call_on_vector(node.lhs, size_fun());
return (node.lhs.vector->*funsize)();
}
static std::pair<atidlas_int_t, atidlas_int_t> matrix_size(viennacl::scheduler::statement_node const & node, bool up_to_internal_size)
static std::pair<atidlas_int_t, atidlas_int_t> matrix_size(scheduler::statement_node const & node, bool up_to_internal_size)
{
using namespace tools;
if (node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
atidlas_int_t (vector_base::*funsize)() const = up_to_internal_size?&vector_base::internal_size:&vector_base::size;
atidlas_int_t (matrix_base::*funsize1)() const = up_to_internal_size?&matrix_base::internal_size1:&matrix_base::size1;
atidlas_int_t (matrix_base::*funsize2)() const = up_to_internal_size?&matrix_base::internal_size2:&matrix_base::size2;
if (node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
{
atidlas_int_t is = call_on_vector(node.lhs, internal_size_fun());
atidlas_int_t s = call_on_vector(node.lhs, size_fun());
return up_to_internal_size?std::make_pair(is,is):std::make_pair(s,s);
atidlas_int_t size = (node.lhs.vector->*funsize)();
return std::make_pair(size,size);
}
else
{
atidlas_int_t size1 = up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun());
atidlas_int_t size2 = up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun());
return std::make_pair(size1, size2);
}
return std::make_pair((node.lhs.matrix->*funsize1)(), (node.lhs.matrix->*funsize2)());
}
//NB : templates are not used here because declaring a functor out of the generate() functions would be harder to read
struct loop_body_base
{
virtual void operator()(tools::kernel_generation_stream & stream, unsigned int simd_width) const = 0;
@@ -507,22 +507,22 @@ protected:
private:
/** @brief Generates the body of the associated kernel function */
virtual std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mapping) const = 0;
virtual std::vector<std::string> generate_impl(std::string const & kernel_prefix, scheduler::statements_container const & statements, std::vector<mapping_type> const & mapping) const = 0;
public:
template_base(binding_policy_t binding_policy) : binding_policy_(binding_policy) {}
virtual unsigned int lmem_usage(statements_container const &) const { return 0; }
virtual unsigned int lmem_usage(scheduler::statements_container const &) const { return 0; }
virtual unsigned int registers_usage(statements_container const &) const { return 0; }
virtual unsigned int registers_usage(scheduler::statements_container const &) const { return 0; }
virtual std::vector<atidlas_int_t> input_sizes(statements_container const & statements) = 0;
virtual std::vector<atidlas_int_t> input_sizes(scheduler::statements_container const & statements) = 0;
virtual ~template_base(){ }
std::vector<std::string> generate(std::string const & kernel_prefix, statements_container const & statements, viennacl::ocl::device const & device)
std::vector<std::string> generate(std::string const & kernel_prefix, scheduler::statements_container const & statements, cl::Device const & device)
{
statements_container::data_type::const_iterator sit;
scheduler::statements_container::data_type::const_iterator sit;
std::vector<mapping_type>::iterator mit;
if(int err = check_invalid(statements, device))
@@ -538,9 +538,9 @@ public:
}
/** @brief returns whether or not the profile has undefined behavior on particular device */
virtual int check_invalid(statements_container const & statements, viennacl::ocl::device const & device) const = 0;
virtual int check_invalid(scheduler::statements_container const & statements, cl::Device const & device) const = 0;
virtual void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements) = 0;
virtual void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, scheduler::statements_container const & statements) = 0;
virtual tools::shared_ptr<template_base> clone() const = 0;
@@ -553,23 +553,23 @@ template<class TemplateType, class ParametersType>
class template_base_impl : public template_base
{
private:
virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const { return TEMPLATE_VALID; }
virtual int check_invalid_impl(cl::Device const &, scheduler::statements_container const &) const { return TEMPLATE_VALID; }
protected:
bool has_misaligned_offset(statements_container const & statements)
bool has_misaligned_offset(scheduler::statements_container const & statements)
{
for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
{
//checks for vectors
std::vector<viennacl::scheduler::lhs_rhs_element> vectors = tools::filter_elements(viennacl::scheduler::DENSE_VECTOR_TYPE, *it);
for (std::vector<viennacl::scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
if (tools::call_on_vector(*itt, tools::stride_fun())>1)
std::vector<scheduler::lhs_rhs_element> vectors = tools::filter_elements(scheduler::DENSE_VECTOR_TYPE, *it);
for (std::vector<scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
if (itt->vector->stride()>1)
return true;
//checks for matrix
std::vector<viennacl::scheduler::lhs_rhs_element> matrices = tools::filter_elements(viennacl::scheduler::DENSE_MATRIX_TYPE, *it);
for (std::vector<viennacl::scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
if (tools::call_on_matrix(*itt, tools::stride1_fun())>1 || tools::call_on_matrix(*itt, tools::stride2_fun())>2)
std::vector<scheduler::lhs_rhs_element> matrices = tools::filter_elements(scheduler::DENSE_MATRIX_TYPE, *it);
for (std::vector<scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
if (itt->matrix->stride1()>1 || itt->matrix->stride2()>1)
return true;
}
return false;
@@ -588,19 +588,17 @@ public:
{ return tools::shared_ptr<template_base>(new TemplateType(*dynamic_cast<TemplateType const *>(this))); }
/** @brief returns whether or not the profile has undefined behavior on particular device */
int check_invalid(statements_container const & statements, viennacl::ocl::device const & device) const
int check_invalid(scheduler::statements_container const & statements, cl::Device const & device) const
{
using namespace viennacl::tools;
//Query device informations
size_t lmem_available = static_cast<size_t>(device.local_mem_size());
size_t lmem_available = device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
size_t lmem_used = lmem_usage(statements);
if (lmem_used>lmem_available)
return TEMPLATE_LOCAL_MEMORY_OVERFLOW;
//Invalid work group size
size_t max_workgroup_size = device.max_work_group_size();
std::vector<size_t> max_work_item_sizes = device.max_work_item_sizes();
size_t max_workgroup_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
std::vector<size_t> max_work_item_sizes = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
if (p_.local_size_0*p_.local_size_1 > max_workgroup_size)
return TEMPLATE_WORK_GROUP_SIZE_OVERFLOW;
if (p_.local_size_0 > max_work_item_sizes[0])
@@ -611,12 +609,12 @@ public:
//Advice from the Intel guide
unsigned int warp_size = 8;
if (device.type()==CL_DEVICE_TYPE_GPU)
if (device.getInfo<CL_DEVICE_TYPE>()==CL_DEVICE_TYPE_GPU)
{
//Advice from the nvidia guide
warp_size = 32;
//Advice from the AMD guide
if (device.vendor_id()==4098)
if (device.getInfo<CL_DEVICE_VENDOR_ID>()==4098)
warp_size = 64;
}
if (((p_.local_size_0*p_.local_size_1)%warp_size)>0)

View File

@@ -4,9 +4,7 @@
#include <vector>
#include <cmath>
#include "viennacl/scheduler/forwards.h"
#include "viennacl/tools/tools.hpp"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/backend/templates/template_base.hpp"
namespace atidlas
@@ -25,14 +23,14 @@ public:
class vector_axpy_template : public template_base_impl<vector_axpy_template, vector_axpy_parameters>
{
private:
virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const
virtual int check_invalid_impl(cl::Device const &, scheduler::statements_container const &) const
{
if (p_.fetching_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
return TEMPLATE_VALID;
}
std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings) const
std::vector<std::string> generate_impl(std::string const & kernel_prefix, scheduler::statements_container const & statements, std::vector<mapping_type> const & mappings) const
{
std::vector<std::string> result;
for (unsigned int i = 0; i < 2; ++i)
@@ -95,28 +93,28 @@ public:
void up_to_internal_size(bool v)
{ up_to_internal_size_ = v; }
std::vector<atidlas_int_t> input_sizes(statements_container const & statements)
std::vector<atidlas_int_t> input_sizes(scheduler::statements_container const & statements)
{
viennacl::scheduler::statement const & statement = statements.data().front();
scheduler::statement const & statement = statements.data().front();
atidlas_int_t size = vector_size(lhs_most(statement.array(), statement.root()), up_to_internal_size_);
return tools::make_vector<atidlas_int_t>() << size;
}
void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements)
void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, scheduler::statements_container const & statements)
{
atidlas_int_t size = input_sizes(statements)[0];
std::string kfallback = kernel_prefix;
kfallback+='0';
std::string kopt = kernel_prefix;
kopt+='1';
bool fallback = p_.simd_width > 1 && (has_strided_access(statements) || (size%p_.simd_width>0) || has_misaligned_offset(statements));
viennacl::ocl::kernel * kernel = &programs[fallback?0:1].program().get_kernel(fallback?kfallback:kopt);
kernel->local_work_size(0, p_.local_size_0);
kernel->global_work_size(0, p_.local_size_0*p_.num_groups);
unsigned int current_arg = 0;
kernel->arg(current_arg++, static_cast<cl_uint>(size));
set_arguments(statements, *kernel, current_arg);
viennacl::ocl::enqueue(*kernel);
// atidlas_int_t size = input_sizes(statements)[0];
// std::string kfallback = kernel_prefix;
// kfallback+='0';
// std::string kopt = kernel_prefix;
// kopt+='1';
// bool fallback = p_.simd_width > 1 && (has_strided_access(statements) || (size%p_.simd_width>0) || has_misaligned_offset(statements));
// cl::Kernel * kernel = &programs[fallback?0:1].program().get_kernel(fallback?kfallback:kopt);
// kernel->local_work_size(0, p_.local_size_0);
// kernel->global_work_size(0, p_.local_size_0*p_.num_groups);
// unsigned int current_arg = 0;
// kernel->arg(current_arg++, static_cast<cl_uint>(size));
// set_arguments(statements, *kernel, current_arg);
// cl::CommandQueue().enqueueNDRangeKernel(kernel);
}
private:

View File

@@ -4,377 +4,56 @@
#include <sstream>
#include "viennacl/matrix_def.hpp"
#include "viennacl/vector_def.hpp"
#include "viennacl/ocl/forwards.h"
#include "viennacl/scheduler/forwards.h"
#include "viennacl/traits/size.hpp"
#include "viennacl/traits/handle.hpp"
#include "atidlas/tools/to_string.hpp"
#include "atidlas/forwards.h"
#include "atidlas/backend/forwards.h"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/tools/find_and_replace.hpp"
namespace atidlas
{
namespace tools
{
template<class T>
T median(std::vector<T> x)
inline std::string numeric_type_to_string(numeric_type const & type)
{
size_t size = x.size();
std::sort(x.begin(), x.end());
if (size % 2 == 0)
return (x[size / 2 - 1] + x[size / 2]) / 2;
else
return x[size / 2];
}
template <typename T>
class make_vector {
public:
typedef make_vector<T> my_type;
my_type& operator<< (const T& val) {
data_.push_back(val);
return *this;
}
operator std::vector<T>() const {
return data_;
}
private:
std::vector<T> data_;
};
//CUDA Conversion
inline std::string opencl_source_to_cuda_source(std::string const & opencl_src)
{
std::string res = opencl_src;
viennacl::tools::find_and_replace(res,"__attribute__","//__attribute__");
//Pointer
viennacl::tools::find_and_replace(res, "__global float*", "float*");
viennacl::tools::find_and_replace(res, "__local float*", "float*");
viennacl::tools::find_and_replace(res, "__global double*", "double*");
viennacl::tools::find_and_replace(res, "__local double*", "double*");
//Qualifiers
viennacl::tools::find_and_replace(res,"__global","__device__");
viennacl::tools::find_and_replace(res,"__kernel","__global__");
viennacl::tools::find_and_replace(res,"__constant","__constant__");
viennacl::tools::find_and_replace(res,"__local","__shared__");
//Indexing
viennacl::tools::find_and_replace(res,"get_num_groups(0)","gridDim.x");
viennacl::tools::find_and_replace(res,"get_num_groups(1)","gridDim.y");
viennacl::tools::find_and_replace(res,"get_local_size(0)","blockDim.x");
viennacl::tools::find_and_replace(res,"get_local_size(1)","blockDim.y");
viennacl::tools::find_and_replace(res,"get_group_id(0)","blockIdx.x");
viennacl::tools::find_and_replace(res,"get_group_id(1)","blockIdx.y");
viennacl::tools::find_and_replace(res,"get_local_id(0)","threadIdx.x");
viennacl::tools::find_and_replace(res,"get_local_id(1)","threadIdx.y");
viennacl::tools::find_and_replace(res,"get_global_id(0)","(blockIdx.x*blockDim.x + threadIdx.x)");
viennacl::tools::find_and_replace(res,"get_global_id(1)","(blockIdx.y*blockDim.y + threadIdx.y)");
//Synchronization
viennacl::tools::find_and_replace(res,"barrier(CLK_LOCAL_MEM_FENCE)","__syncthreads()");
viennacl::tools::find_and_replace(res,"barrier(CLK_GLOBAL_MEM_FENCE)","__syncthreads()");
return res;
}
static std::string numeric_type_to_string(viennacl::scheduler::statement_node_numeric_type const & type){
switch (type)
{
//case viennacl::scheduler::CHAR_TYPE: return "char";
//case viennacl::scheduler::UCHAR_TYPE: return "unsigned char";
//case viennacl::scheduler::SHORT_TYPE: return "short";
//case viennacl::scheduler::USHORT_TYPE: return "unsigned short";
case viennacl::scheduler::INT_TYPE: return "int";
case viennacl::scheduler::UINT_TYPE: return "unsigned int";
case viennacl::scheduler::LONG_TYPE: return "long";
case viennacl::scheduler::ULONG_TYPE: return "unsigned long";
case viennacl::scheduler::FLOAT_TYPE : return "float";
case viennacl::scheduler::DOUBLE_TYPE : return "double";
//case CHAR_TYPE: return "char";
//case UCHAR_TYPE: return "uchar";
//case SHORT_TYPE: return "short";
//case USHORT_TYPE: return "ushort";
case INT_TYPE: return "int";
case UINT_TYPE: return "uint";
case LONG_TYPE: return "long";
case ULONG_TYPE: return "ulong";
case FLOAT_TYPE : return "float";
case DOUBLE_TYPE : return "double";
default : throw generator_not_supported_exception("Unsupported Scalartype");
}
}
template<class Fun>
static typename Fun::result_type call_on_host_scalar(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
assert(element.type_family == viennacl::scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a host scalar"));
switch (element.numeric_type)
{
//case viennacl::scheduler::CHAR_TYPE: return fun(element.host_char);
//case viennacl::scheduler::UCHAR_TYPE: return fun(element.host_uchar);
//case viennacl::scheduler::SHORT_TYPE: return fun(element.host_short);
//case viennacl::scheduler::USHORT_TYPE: return fun(element.host_ushort);
case viennacl::scheduler::INT_TYPE: return fun(element.host_int);
case viennacl::scheduler::UINT_TYPE: return fun(element.host_uint);
case viennacl::scheduler::LONG_TYPE: return fun(element.host_long);
case viennacl::scheduler::ULONG_TYPE: return fun(element.host_ulong);
case viennacl::scheduler::FLOAT_TYPE : return fun(element.host_float);
case viennacl::scheduler::DOUBLE_TYPE : return fun(element.host_double);
default : throw generator_not_supported_exception("Unsupported Scalartype");
}
}
template<class Fun>
static typename Fun::result_type call_on_scalar(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
assert(element.type_family == viennacl::scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a scalar"));
switch (element.numeric_type)
{
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.scalar_char);
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.scalar_uchar);
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.scalar_short);
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.scalar_ushort);
case viennacl::scheduler::INT_TYPE: return fun(*element.scalar_int);
case viennacl::scheduler::UINT_TYPE: return fun(*element.scalar_uint);
case viennacl::scheduler::LONG_TYPE: return fun(*element.scalar_long);
case viennacl::scheduler::ULONG_TYPE: return fun(*element.scalar_ulong);
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.scalar_float);
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.scalar_double);
default : throw generator_not_supported_exception("Unsupported Scalartype");
}
}
template<class Fun>
static typename Fun::result_type call_on_vector(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
assert(element.type_family == viennacl::scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a vector"));
switch (element.numeric_type)
{
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.vector_char);
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.vector_uchar);
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.vector_short);
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.vector_ushort);
case viennacl::scheduler::INT_TYPE: return fun(*element.vector_int);
case viennacl::scheduler::UINT_TYPE: return fun(*element.vector_uint);
case viennacl::scheduler::LONG_TYPE: return fun(*element.vector_long);
case viennacl::scheduler::ULONG_TYPE: return fun(*element.vector_ulong);
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.vector_float);
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.vector_double);
default : throw generator_not_supported_exception("Unsupported Scalartype");
}
}
template<class Fun>
static typename Fun::result_type call_on_implicit_vector(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
assert(element.type_family == viennacl::scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a implicit_vector"));
assert(element.subtype == viennacl::scheduler::IMPLICIT_VECTOR_TYPE && bool("Must be called on a implicit_vector"));
switch (element.numeric_type)
{
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.implicit_vector_char);
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.implicit_vector_uchar);
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.implicit_vector_short);
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.implicit_vector_ushort);
case viennacl::scheduler::INT_TYPE: return fun(*element.implicit_vector_int);
case viennacl::scheduler::UINT_TYPE: return fun(*element.implicit_vector_uint);
case viennacl::scheduler::LONG_TYPE: return fun(*element.implicit_vector_long);
case viennacl::scheduler::ULONG_TYPE: return fun(*element.implicit_vector_ulong);
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.implicit_vector_float);
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.implicit_vector_double);
default : throw generator_not_supported_exception("Unsupported Scalartype");
}
}
template<class Fun>
static typename Fun::result_type call_on_matrix(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
assert(element.type_family == viennacl::scheduler::MATRIX_TYPE_FAMILY && bool("Must be called on a matrix"));
switch (element.numeric_type)
{
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.matrix_char);
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.matrix_uchar);
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.matrix_short);
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.matrix_ushort);
case viennacl::scheduler::INT_TYPE: return fun(*element.matrix_int);
case viennacl::scheduler::UINT_TYPE: return fun(*element.matrix_uint);
case viennacl::scheduler::LONG_TYPE: return fun(*element.matrix_long);
case viennacl::scheduler::ULONG_TYPE: return fun(*element.matrix_ulong);
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.matrix_float);
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.matrix_double);
default : throw generator_not_supported_exception("Unsupported Scalartype");
}
}
template<class Fun>
static typename Fun::result_type call_on_implicit_matrix(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
assert(element.subtype == viennacl::scheduler::IMPLICIT_MATRIX_TYPE && bool("Must be called on a implicit matrix"));
switch (element.numeric_type)
{
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.implicit_matrix_char);
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.implicit_matrix_uchar);
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.implicit_matrix_short);
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.implicit_matrix_ushort);
case viennacl::scheduler::INT_TYPE: return fun(*element.implicit_matrix_int);
case viennacl::scheduler::UINT_TYPE: return fun(*element.implicit_matrix_uint);
case viennacl::scheduler::LONG_TYPE: return fun(*element.implicit_matrix_long);
case viennacl::scheduler::ULONG_TYPE: return fun(*element.implicit_matrix_ulong);
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.implicit_matrix_float);
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.implicit_matrix_double);
default : throw generator_not_supported_exception("Unsupported Scalartype");
}
}
template<class Fun>
static typename Fun::result_type call_on_element(viennacl::scheduler::lhs_rhs_element const & element, Fun const & fun){
switch (element.type_family)
{
case viennacl::scheduler::SCALAR_TYPE_FAMILY:
if (element.subtype == viennacl::scheduler::HOST_SCALAR_TYPE)
return call_on_host_scalar(element, fun);
else
return call_on_scalar(element, fun);
case viennacl::scheduler::VECTOR_TYPE_FAMILY :
if (element.subtype == viennacl::scheduler::IMPLICIT_VECTOR_TYPE)
return call_on_implicit_vector(element, fun);
else
return call_on_vector(element, fun);
case viennacl::scheduler::MATRIX_TYPE_FAMILY:
if (element.subtype == viennacl::scheduler::IMPLICIT_MATRIX_TYPE)
return call_on_implicit_matrix(element, fun);
else
return call_on_matrix(element,fun);
default:
throw generator_not_supported_exception("Unsupported datastructure type : Not among {Scalar, Vector, Matrix}");
}
}
struct scalartype_size_fun
{
typedef atidlas_int_t result_type;
result_type operator()(float const &) const { return sizeof(float); }
result_type operator()(double const &) const { return sizeof(double); }
template<class T> result_type operator()(T const &) const { return sizeof(typename viennacl::result_of::cpu_value_type<T>::type); }
};
struct internal_size_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::internal_size(t); }
};
struct size_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::size(t); }
};
struct start_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::start(t); }
};
struct stride_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride(t); }
};
struct start1_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::start1(t); }
};
struct start2_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::start2(t); }
};
struct leading_stride_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride1(t); }
};
struct leading_start_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::start1(t); }
};
struct stride1_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride1(t); }
};
struct stride2_fun
{
typedef atidlas_int_t result_type;
template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride2(t); }
};
struct handle_fun
{
typedef cl_mem result_type;
template<class T>
result_type operator()(T const &t) const { return viennacl::traits::opencl_handle(t); }
};
struct internal_size1_fun
{
typedef atidlas_int_t result_type;
template<class T>
result_type operator()(T const &t) const { return viennacl::traits::internal_size1(t); }
};
struct internal_size2_fun
{
typedef atidlas_int_t result_type;
template<class T>
result_type operator()(T const &t) const { return viennacl::traits::internal_size2(t); }
};
struct size1_fun
{
typedef atidlas_int_t result_type;
template<class T>
result_type operator()(T const &t) const { return viennacl::traits::size1(t); }
};
struct size2_fun
{
typedef atidlas_int_t result_type;
template<class T>
result_type operator()(T const &t) const { return viennacl::traits::size2(t); }
};
template<class T, class U>
struct is_same_type { enum { value = 0 }; };
template<class T>
struct is_same_type<T,T> { enum { value = 1 }; };
inline bool is_reduction(viennacl::scheduler::statement_node const & node)
inline bool is_reduction(scheduler::statement_node const & node)
{
return node.op.type_family==viennacl::scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY
|| node.op.type_family==viennacl::scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY
|| node.op.type_family==viennacl::scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE;
return node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY
|| node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY
|| node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
|| node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE
|| node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE;
}
inline bool is_index_reduction(viennacl::scheduler::op_element const & op)
inline bool is_index_reduction(scheduler::op_element const & op)
{
return op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE
|| op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE
|| op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE
|| op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE;
return op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE
|| op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE
|| op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE
|| op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE;
}
template<class T>
struct type_to_string;
@@ -434,9 +113,9 @@ private:
std::ostringstream oss;
};
inline bool node_leaf(viennacl::scheduler::op_element const & op)
inline bool node_leaf(scheduler::op_element const & op)
{
using namespace viennacl::scheduler;
using namespace scheduler;
return op.type==OPERATION_UNARY_NORM_1_TYPE
|| op.type==OPERATION_UNARY_NORM_2_TYPE
|| op.type==OPERATION_UNARY_NORM_INF_TYPE
@@ -453,9 +132,9 @@ inline bool node_leaf(viennacl::scheduler::op_element const & op)
|| op.type_family==OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY;
}
inline bool elementwise_operator(viennacl::scheduler::op_element const & op)
inline bool elementwise_operator(scheduler::op_element const & op)
{
using namespace viennacl::scheduler;
using namespace scheduler;
return op.type== OPERATION_BINARY_ASSIGN_TYPE
|| op.type== OPERATION_BINARY_INPLACE_ADD_TYPE
|| op.type== OPERATION_BINARY_INPLACE_SUB_TYPE
@@ -467,9 +146,9 @@ inline bool elementwise_operator(viennacl::scheduler::op_element const & op)
|| op.type== OPERATION_BINARY_DIV_TYPE;
}
inline bool elementwise_function(viennacl::scheduler::op_element const & op)
inline bool elementwise_function(scheduler::op_element const & op)
{
using namespace viennacl::scheduler;
using namespace scheduler;
return
op.type == OPERATION_UNARY_CAST_CHAR_TYPE
@@ -516,37 +195,13 @@ inline bool elementwise_function(viennacl::scheduler::op_element const & op)
}
inline viennacl::scheduler::lhs_rhs_element & lhs_rhs_element(viennacl::scheduler::statement const & st, atidlas_int_t idx, leaf_t leaf)
inline scheduler::lhs_rhs_element & lhs_rhs_element(scheduler::statement const & st, atidlas_int_t idx, leaf_t leaf)
{
using namespace tools;
assert(leaf==LHS_NODE_TYPE || leaf==RHS_NODE_TYPE);
if (leaf==LHS_NODE_TYPE)
return const_cast<viennacl::scheduler::lhs_rhs_element &>(st.array()[idx].lhs);
return const_cast<viennacl::scheduler::lhs_rhs_element &>(st.array()[idx].rhs);
}
inline unsigned int size_of(viennacl::scheduler::statement_node_numeric_type type)
{
using namespace viennacl::scheduler;
switch (type)
{
case UCHAR_TYPE:
case CHAR_TYPE: return 1;
case USHORT_TYPE:
case SHORT_TYPE:
case HALF_TYPE: return 2;
case UINT_TYPE:
case INT_TYPE:
case FLOAT_TYPE: return 4;
case ULONG_TYPE:
case LONG_TYPE:
case DOUBLE_TYPE: return 8;
default: throw generator_not_supported_exception("Unsupported scalartype");
}
return const_cast<scheduler::lhs_rhs_element &>(st.array()[idx].lhs);
return const_cast<scheduler::lhs_rhs_element &>(st.array()[idx].rhs);
}
inline std::string append_width(std::string const & str, unsigned int width)
@@ -584,6 +239,20 @@ private:
typedef create_map<std::multimap<std::string, std::string> > create_process_accessors;
typedef create_map<std::map<std::string, std::string> > create_evaluate_accessors;
template <typename T>
class make_vector {
public:
typedef make_vector<T> my_type;
my_type& operator<< (const T& val) {
data_.push_back(val);
return *this;
}
operator std::vector<T>() const {
return data_;
}
private:
std::vector<T> data_;
};
}
}

View File

@@ -5,9 +5,7 @@
#include <set>
#include "CL/cl.h"
#include "viennacl/forwards.h"
#include "viennacl/scheduler/forwards.h"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/backend/mapped_objects.hpp"
#include "atidlas/backend/tools/misc.hpp"
#include "atidlas/forwards.h"
@@ -22,15 +20,15 @@ namespace tools
class traversal_functor
{
public:
void call_before_expansion(viennacl::scheduler::statement const &, atidlas_int_t) const { }
void call_after_expansion(viennacl::scheduler::statement const &, atidlas_int_t) const { }
void call_before_expansion(scheduler::statement const &, atidlas_int_t) const { }
void call_after_expansion(scheduler::statement const &, atidlas_int_t) const { }
};
/** @brief Recursively execute a functor on a statement */
template<class Fun>
inline void traverse(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect)
inline void traverse(scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect)
{
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
scheduler::statement_node const & root_node = statement.array()[root_idx];
bool recurse = tools::node_leaf(root_node.op)?inspect:true;
fun.call_before_expansion(statement, root_idx);
@@ -38,9 +36,9 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i
//Lhs:
if (recurse)
{
if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
traverse(statement, root_node.lhs.node_index, fun, inspect);
if (root_node.lhs.type_family != viennacl::scheduler::INVALID_TYPE_FAMILY)
if (root_node.lhs.type_family != scheduler::INVALID_TYPE_FAMILY)
fun(statement, root_idx, LHS_NODE_TYPE);
}
@@ -48,11 +46,11 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i
fun(statement, root_idx, PARENT_NODE_TYPE);
//Rhs:
if (recurse && root_node.rhs.type_family!=viennacl::scheduler::INVALID_TYPE_FAMILY)
if (recurse && root_node.rhs.type_family!=scheduler::INVALID_TYPE_FAMILY)
{
if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
traverse(statement, root_node.rhs.node_index, fun, inspect);
if (root_node.rhs.type_family != viennacl::scheduler::INVALID_TYPE_FAMILY)
if (root_node.rhs.type_family != scheduler::INVALID_TYPE_FAMILY)
fun(statement, root_idx, RHS_NODE_TYPE);
}
@@ -62,13 +60,13 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i
class filter_fun : public traversal_functor
{
public:
typedef bool (*pred_t)(viennacl::scheduler::statement_node const & node);
typedef bool (*pred_t)(scheduler::statement_node const & node);
filter_fun(pred_t pred, std::vector<size_t> & out) : pred_(pred), out_(out){ }
void operator()(viennacl::scheduler::statement const & statement, size_t root_idx, leaf_t) const
void operator()(scheduler::statement const & statement, size_t root_idx, leaf_t) const
{
viennacl::scheduler::statement_node const * root_node = &statement.array()[root_idx];
scheduler::statement_node const * root_node = &statement.array()[root_idx];
if (pred_(*root_node))
out_.push_back(root_idx);
}
@@ -77,7 +75,7 @@ private:
std::vector<size_t> & out_;
};
inline std::vector<size_t> filter_nodes(bool (*pred)(viennacl::scheduler::statement_node const & node), viennacl::scheduler::statement const & statement, bool inspect)
inline std::vector<size_t> filter_nodes(bool (*pred)(scheduler::statement_node const & node), scheduler::statement const & statement, bool inspect)
{
std::vector<size_t> res;
tools::traverse(statement, statement.root(), filter_fun(pred, res), inspect);
@@ -87,32 +85,32 @@ inline std::vector<size_t> filter_nodes(bool (*pred)(viennacl::scheduler::statem
class filter_elements_fun : public traversal_functor
{
public:
filter_elements_fun(viennacl::scheduler::statement_node_subtype subtype, std::vector<viennacl::scheduler::lhs_rhs_element> & out) : subtype_(subtype), out_(out) { }
filter_elements_fun(scheduler::statement_node_subtype subtype, std::vector<scheduler::lhs_rhs_element> & out) : subtype_(subtype), out_(out) { }
void operator()(viennacl::scheduler::statement const & statement, size_t root_idx, leaf_t) const
void operator()(scheduler::statement const & statement, size_t root_idx, leaf_t) const
{
viennacl::scheduler::statement_node const * root_node = &statement.array()[root_idx];
scheduler::statement_node const * root_node = &statement.array()[root_idx];
if (root_node->lhs.subtype==subtype_)
out_.push_back(root_node->lhs);
if (root_node->rhs.subtype==subtype_)
out_.push_back(root_node->rhs);
}
private:
viennacl::scheduler::statement_node_subtype subtype_;
std::vector<viennacl::scheduler::lhs_rhs_element> & out_;
scheduler::statement_node_subtype subtype_;
std::vector<scheduler::lhs_rhs_element> & out_;
};
inline std::vector<viennacl::scheduler::lhs_rhs_element> filter_elements(viennacl::scheduler::statement_node_subtype subtype, viennacl::scheduler::statement const & statement)
inline std::vector<scheduler::lhs_rhs_element> filter_elements(scheduler::statement_node_subtype subtype, scheduler::statement const & statement)
{
std::vector<viennacl::scheduler::lhs_rhs_element> res;
std::vector<scheduler::lhs_rhs_element> res;
tools::traverse(statement, statement.root(), filter_elements_fun(subtype, res), true);
return res;
}
/** @brief generate a string from an operation_node_type */
inline const char * evaluate(viennacl::scheduler::operation_node_type type)
inline const char * evaluate(scheduler::operation_node_type type)
{
using namespace viennacl::scheduler;
using namespace scheduler;
// unary expression
switch (type)
{
@@ -194,9 +192,9 @@ inline const char * evaluate(viennacl::scheduler::operation_node_type type)
}
}
inline const char * operator_string(viennacl::scheduler::operation_node_type type)
inline const char * operator_string(scheduler::operation_node_type type)
{
using namespace viennacl::scheduler;
using namespace scheduler;
switch (type)
{
case OPERATION_UNARY_CAST_CHAR_TYPE : return "char";
@@ -237,24 +235,24 @@ private:
public:
evaluate_expression_traversal(std::map<std::string, std::string> const & accessors, std::string & str, mapping_type const & mapping) : accessors_(accessors), str_(str), mapping_(mapping){ }
void call_before_expansion(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx) const
void call_before_expansion(scheduler::statement const & statement, atidlas_int_t root_idx) const
{
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
if ((root_node.op.type_family==viennacl::scheduler::OPERATION_UNARY_TYPE_FAMILY || tools::elementwise_function(root_node.op))
scheduler::statement_node const & root_node = statement.array()[root_idx];
if ((root_node.op.type_family==scheduler::OPERATION_UNARY_TYPE_FAMILY || tools::elementwise_function(root_node.op))
&& !tools::node_leaf(root_node.op))
str_+=tools::evaluate(root_node.op.type);
str_+="(";
}
void call_after_expansion(viennacl::scheduler::statement const & /*statement*/, atidlas_int_t /*root_idx*/) const
void call_after_expansion(scheduler::statement const & /*statement*/, atidlas_int_t /*root_idx*/) const
{
str_+=")";
}
void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf) const
void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf) const
{
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
scheduler::statement_node const & root_node = statement.array()[root_idx];
mapping_type::key_type key = std::make_pair(root_idx, leaf);
if (leaf==PARENT_NODE_TYPE)
{
@@ -262,20 +260,20 @@ public:
str_ += mapping_.at(key)->evaluate(accessors_);
else if (tools::elementwise_operator(root_node.op))
str_ += tools::evaluate(root_node.op.type);
else if (root_node.op.type_family!=viennacl::scheduler::OPERATION_UNARY_TYPE_FAMILY && tools::elementwise_function(root_node.op))
else if (root_node.op.type_family!=scheduler::OPERATION_UNARY_TYPE_FAMILY && tools::elementwise_function(root_node.op))
str_ += ",";
}
else
{
if (leaf==LHS_NODE_TYPE)
{
if (root_node.lhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
if (root_node.lhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY)
str_ += mapping_.at(key)->evaluate(accessors_);
}
if (leaf==RHS_NODE_TYPE)
{
if (root_node.rhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
if (root_node.rhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY)
str_ += mapping_.at(key)->evaluate(accessors_);
}
}
@@ -283,22 +281,22 @@ public:
};
inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors,
viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, mapping_type const & mapping)
scheduler::statement const & statement, atidlas_int_t root_idx, mapping_type const & mapping)
{
std::string res;
evaluate_expression_traversal traversal_functor(accessors, res, mapping);
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
scheduler::statement_node const & root_node = statement.array()[root_idx];
if (leaf==RHS_NODE_TYPE)
{
if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
tools::traverse(statement, root_node.rhs.node_index, traversal_functor, false);
else
traversal_functor(statement, root_idx, leaf);
}
else if (leaf==LHS_NODE_TYPE)
{
if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
tools::traverse(statement, root_node.lhs.node_index, traversal_functor, false);
else
traversal_functor(statement, root_idx, leaf);
@@ -310,9 +308,9 @@ inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> cons
}
inline void evaluate(tools::kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors,
statements_container const & statements, std::vector<mapping_type> const & mappings)
scheduler::statements_container const & statements, std::vector<mapping_type> const & mappings)
{
statements_container::data_type::const_iterator sit;
scheduler::statements_container::data_type::const_iterator sit;
std::vector<mapping_type>::const_iterator mit;
for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++mit, ++sit)
@@ -327,7 +325,7 @@ public:
process_traversal(std::multimap<std::string, std::string> const & accessors, tools::kernel_generation_stream & stream,
mapping_type const & mapping, std::set<std::string> & already_processed) : accessors_(accessors), stream_(stream), mapping_(mapping), already_processed_(already_processed){ }
void operator()(viennacl::scheduler::statement const & /*statement*/, atidlas_int_t root_idx, leaf_t leaf) const
void operator()(scheduler::statement const & /*statement*/, atidlas_int_t root_idx, leaf_t leaf) const
{
mapping_type::const_iterator it = mapping_.find(std::make_pair(root_idx, leaf));
if (it!=mapping_.end())
@@ -353,21 +351,21 @@ private:
};
inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap<std::string, std::string> const & accessors,
viennacl::scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed)
scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed)
{
process_traversal traversal_functor(accessors, stream, mapping, already_processed);
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
scheduler::statement_node const & root_node = statement.array()[root_idx];
if (leaf==RHS_NODE_TYPE)
{
if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
tools::traverse(statement, root_node.rhs.node_index, traversal_functor, true);
else
traversal_functor(statement, root_idx, leaf);
}
else if (leaf==LHS_NODE_TYPE)
{
if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
tools::traverse(statement, root_node.lhs.node_index, traversal_functor, true);
else
traversal_functor(statement, root_idx, leaf);
@@ -379,9 +377,9 @@ inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::
}
inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap<std::string, std::string> const & accessors,
statements_container const & statements, std::vector<mapping_type> const & mappings)
scheduler::statements_container const & statements, std::vector<mapping_type> const & mappings)
{
statements_container::data_type::const_iterator sit;
scheduler::statements_container::data_type::const_iterator sit;
std::vector<mapping_type>::const_iterator mit;
std::set<std::string> already_processed;
@@ -404,78 +402,83 @@ private:
}
}
public:
typedef void result_type;
/** @brief Vector mapping */
inline void append(atidlas::vector_base const & vec) const
{
*ptr_++='v'; //vector
*ptr_++=(char)vec.dtype();
append_id(ptr_, binder_.get(&vec.data()));
}
inline void append(scheduler::lhs_rhs_element const & lhs_rhs) const
{
if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE)
append(*lhs_rhs.vector);
}
public:
statement_representation_functor(symbolic_binder & binder, char *& ptr) : binder_(binder), ptr_(ptr){ }
template<class NumericT>
inline result_type operator()(NumericT const & /*scal*/) const
{
*ptr_++='h'; //host
*ptr_++='s'; //scalar
*ptr_++=tools::first_letter_of_type<NumericT>::value();
}
// template<class NumericT>
// inline result_type operator()(NumericT const & /*scal*/) const
// {
// *ptr_++='h'; //host
// *ptr_++='s'; //scalar
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
// }
/** @brief Scalar mapping */
template<class NumericT>
inline result_type operator()(viennacl::scalar<NumericT> const & scal) const
{
*ptr_++='s'; //scalar
*ptr_++=tools::first_letter_of_type<NumericT>::value();
append_id(ptr_, binder_.get(&viennacl::traits::handle(scal)));
}
// /** @brief Scalar mapping */
// template<class NumericT>
// inline result_type operator()(viennacl::scalar<NumericT> const & scal) const
// {
// *ptr_++='s'; //scalar
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
// append_id(ptr_, binder_.get(&viennacl::traits::handle(scal)));
// }
/** @brief Vector mapping */
template<class NumericT>
inline result_type operator()(viennacl::vector_base<NumericT> const & vec) const
{
*ptr_++='v'; //vector
*ptr_++=tools::first_letter_of_type<NumericT>::value();
append_id(ptr_, binder_.get(&viennacl::traits::handle(vec)));
}
/** @brief Implicit vector mapping */
template<class NumericT>
inline result_type operator()(viennacl::implicit_vector_base<NumericT> const & /*vec*/) const
{
*ptr_++='i'; //implicit
*ptr_++='v'; //vector
*ptr_++=tools::first_letter_of_type<NumericT>::value();
}
/** @brief Matrix mapping */
template<class NumericT>
inline result_type operator()(viennacl::matrix_base<NumericT> const & mat) const
{
*ptr_++='m'; //Matrix
*ptr_++=tools::first_letter_of_type<NumericT>::value();
append_id(ptr_, binder_.get(&viennacl::traits::handle(mat)));
}
// /** @brief Implicit vector mapping */
// template<class NumericT>
// inline result_type operator()(viennacl::implicit_vector_base<NumericT> const & /*vec*/) const
// {
// *ptr_++='i'; //implicit
// *ptr_++='v'; //vector
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
// }
/** @brief Implicit matrix mapping */
template<class NumericT>
inline result_type operator()(viennacl::implicit_matrix_base<NumericT> const & /*mat*/) const
{
*ptr_++='i'; //implicit
*ptr_++='m'; //matrix
*ptr_++=tools::first_letter_of_type<NumericT>::value();
}
// /** @brief Matrix mapping */
// template<class NumericT>
// inline result_type operator()(viennacl::matrix_base<NumericT> const & mat) const
// {
// *ptr_++='m'; //Matrix
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
// append_id(ptr_, binder_.get(&viennacl::traits::handle(mat)));
// }
static inline void append(char*& p, const char * str)
// /** @brief Implicit matrix mapping */
// template<class NumericT>
// inline result_type operator()(viennacl::implicit_matrix_base<NumericT> const & /*mat*/) const
// {
// *ptr_++='i'; //implicit
// *ptr_++='m'; //matrix
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
// }
inline void append(char*& p, const char * str) const
{
std::size_t n = std::strlen(str);
std::memcpy(p, str, n);
p+=n;
}
inline void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const
inline void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const
{
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
tools::call_on_element(root_node.lhs, *this);
else if (root_node.op.type_family==viennacl::scheduler::OPERATION_BINARY_TYPE_FAMILY && leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
tools::call_on_element(root_node.rhs, *this);
scheduler::statement_node const & root_node = statement.array()[root_idx];
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
append(root_node.lhs);
else if (root_node.op.type_family==scheduler::OPERATION_BINARY_TYPE_FAMILY && leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
append(root_node.rhs);
else if (leaf_t==PARENT_NODE_TYPE)
append_id(ptr_,root_node.op.type);
}
@@ -485,16 +488,16 @@ private:
char *& ptr_;
};
inline std::string statements_representation(statements_container const & statements, binding_policy_t binding_policy)
inline std::string statements_representation(scheduler::statements_container const & statements, binding_policy_t binding_policy)
{
std::vector<char> program_name_vector(256);
char* program_name = program_name_vector.data();
if (statements.order()==statements_container::INDEPENDENT)
if (statements.order()==scheduler::statements_container::INDEPENDENT)
*program_name++='i';
else
*program_name++='s';
tools::shared_ptr<symbolic_binder> binder = make_binder(binding_policy);
for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
tools::traverse(*it, it->root(), tools::statement_representation_functor(*binder, program_name),true);
*program_name='\0';
return std::string(program_name_vector.data());

View File

@@ -0,0 +1,17 @@
#ifndef ATIDLAS_VECTOR_HPP_
#define ATIDLAS_VECTOR_HPP_
#include "CL/cl.hpp"
#include "atidlas/forwards.h"
namespace atidlas
{
}
#endif

View File

@@ -0,0 +1,80 @@
#ifndef ATIDLAS_EXPRESSION_TEMPLATE_HPP_
#define ATIDLAS_EXPRESSION_TEMPLATE_HPP_
#include "atidlas/forwards.h"
#include "atidlas/traits/size.hpp"
namespace atidlas
{
namespace detail
{
template<typename T>
struct reference_if_nonscalar
{
typedef T & type;
};
#define ATIDLAS_REFERENCE_IF_NONSCALAR(TNAME) \
template<> struct reference_if_nonscalar<TNAME> { typedef TNAME type; }; \
template<> struct reference_if_nonscalar<const TNAME> { typedef const TNAME type; };
ATIDLAS_REFERENCE_IF_NONSCALAR(char)
ATIDLAS_REFERENCE_IF_NONSCALAR(short)
ATIDLAS_REFERENCE_IF_NONSCALAR(int)
ATIDLAS_REFERENCE_IF_NONSCALAR(long)
ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned char)
ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned short)
ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned int)
ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned long)
ATIDLAS_REFERENCE_IF_NONSCALAR(float)
ATIDLAS_REFERENCE_IF_NONSCALAR(double)
#undef ATIDLAS_REFERENCE_IF_NONSCALAR
}
/** @brief An expression template class that represents a binary operation
* @tparam LHS left hand side operand
* @tparam RHS right hand side operand
* @tparam OP the operator
*/
template<typename LHS, typename RHS, typename OP>
class expression_template
{
typedef typename detail::reference_if_nonscalar<LHS>::type lhs_reference_type;
typedef typename detail::reference_if_nonscalar<RHS>::type rhs_reference_type;
public:
expression_template(LHS & l, RHS & r) : lhs_(l), rhs_(r) {}
/** @brief Get left hand side operand */
lhs_reference_type lhs() const { return lhs_; }
/** @brief Get right hand side operand */
rhs_reference_type rhs() const { return rhs_; }
/** @brief Returns the size of the result vector */
atidlas_int_t size() const { return traits::size(*this); }
private:
/** @brief The left hand side operand */
lhs_reference_type lhs_;
/** @brief The right hand side operand */
rhs_reference_type rhs_;
};
template<typename LHS, typename RHS, typename OP>
struct vector_expression: public expression_template<LHS, RHS, OP>{
vector_expression(LHS & l, RHS & r) : expression_template<LHS, RHS, OP>(l, r){ }
};
template<typename LHS, typename RHS, typename OP>
class matrix_expression: public expression_template<LHS, RHS, OP>{
matrix_expression(LHS & l, RHS & r) : expression_template<LHS, RHS, OP>(l, r){ }
};
template<typename LHS, typename RHS, typename OP>
class scalar_expression: public expression_template<LHS, RHS, OP>{
scalar_expression(LHS & l, RHS & r) : expression_template<LHS, RHS, OP>(l, r){ }
};
}
#endif

View File

@@ -1,10 +1,285 @@
#ifndef ATIDLAS_FORWARDS_H
#define ATIDLAS_FORWARDS_H
#include "CL/cl.hpp"
namespace atidlas
{
typedef int atidlas_int_t;
enum numeric_type
{
INVALID_NUMERIC_TYPE = 0,
CHAR_TYPE,
UCHAR_TYPE,
SHORT_TYPE,
USHORT_TYPE,
INT_TYPE,
UINT_TYPE,
LONG_TYPE,
ULONG_TYPE,
HALF_TYPE,
FLOAT_TYPE,
DOUBLE_TYPE
};
inline unsigned int size_of(numeric_type type)
{
switch (type)
{
case UCHAR_TYPE:
case CHAR_TYPE: return 1;
case USHORT_TYPE:
case SHORT_TYPE:
case HALF_TYPE: return 2;
case UINT_TYPE:
case INT_TYPE:
case FLOAT_TYPE: return 4;
case ULONG_TYPE:
case LONG_TYPE:
case DOUBLE_TYPE: return 8;
default: throw "Unsupported numeric type";
}
}
template<class LHS, class RHS, class OP>
class vector_expression;
class vector_base
{
public:
vector_base(atidlas_int_t size, numeric_type dtype, cl::Context context) : dtype_(dtype), size_(size), internal_size_(size), start_(0), stride_(1), context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*internal_size()) {}
vector_base(cl::Buffer data, atidlas_int_t size, numeric_type dtype, atidlas_int_t start, atidlas_int_t stride): dtype_(dtype), size_(size), internal_size_(size), start_(start), stride_(stride), context_(data.getInfo<CL_MEM_CONTEXT>()), data_(data){ }
numeric_type dtype() const { return dtype_; }
atidlas_int_t size() const { return size_; }
atidlas_int_t internal_size() const { return internal_size_; }
atidlas_int_t start() const { return start_; }
atidlas_int_t stride() const { return stride_; }
template<class LHS, class RHS, class OP>
vector_base& operator=(vector_expression<LHS, RHS, OP> const &);
cl::Context const & context() const { return context_; }
cl::Buffer const & data() const { return data_; }
private:
numeric_type dtype_;
atidlas_int_t size_;
atidlas_int_t internal_size_;
atidlas_int_t start_;
atidlas_int_t stride_;
cl::Context context_;
cl::Buffer data_;
};
class matrix_base
{
public:
matrix_base(atidlas_int_t size1, atidlas_int_t size2, numeric_type dtype, cl::Context context) : dtype_(dtype), size1_(size1), internal_size1_(size1), start1_(0), stride1_(1),
size2_(size2), internal_size2_(size2), start2_(0), stride2_(2),
context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*internal_size()) {}
matrix_base(cl::Buffer data, atidlas_int_t size1, atidlas_int_t start1, atidlas_int_t stride1,
atidlas_int_t size2, atidlas_int_t start2, atidlas_int_t stride2,
numeric_type dtype): dtype_(dtype), size1_(size1), start1_(start1), stride1_(stride1),
size2_(size2), start2_(start2), stride2_(stride2), context_(data.getInfo<CL_MEM_CONTEXT>()), data_(data){ }
numeric_type dtype() const { return dtype_; }
atidlas_int_t size1() const { return size1_; }
atidlas_int_t internal_size1() const { return size1_; }
atidlas_int_t start1() const { return start1_; }
atidlas_int_t stride1() const { return stride1_; }
atidlas_int_t size2() const { return size2_; }
atidlas_int_t internal_size2() const { return size2_; }
atidlas_int_t start2() const { return start2_; }
atidlas_int_t stride2() const { return stride2_; }
atidlas_int_t internal_size() const { return internal_size1_*internal_size2_; }
cl::Context const & context() const { return context_; }
cl::Buffer const & data() const { return data_; }
private:
numeric_type dtype_;
atidlas_int_t size1_;
atidlas_int_t internal_size1_;
atidlas_int_t start1_;
atidlas_int_t stride1_;
atidlas_int_t size2_;
atidlas_int_t internal_size2_;
atidlas_int_t start2_;
atidlas_int_t stride2_;
cl::Context context_;
cl::Buffer data_;
};
/** @brief A tag class representing assignment */
struct op_assign {};
/** @brief A tag class representing inplace addition */
struct op_inplace_add {};
/** @brief A tag class representing inplace subtraction */
struct op_inplace_sub {};
/** @brief A tag class representing addition */
struct op_add {};
/** @brief A tag class representing subtraction */
struct op_sub {};
/** @brief A tag class representing multiplication by a scalar */
struct op_mult {};
/** @brief A tag class representing matrix-vector products and element-wise multiplications*/
struct op_prod {};
/** @brief A tag class representing matrix-matrix products */
struct op_mat_mat_prod {};
/** @brief A tag class representing division */
struct op_div {};
/** @brief A tag class representing the power function */
struct op_pow {};
/** @brief A tag class representing equality */
struct op_eq {};
/** @brief A tag class representing inequality */
struct op_neq {};
/** @brief A tag class representing greater-than */
struct op_greater {};
/** @brief A tag class representing less-than */
struct op_less {};
/** @brief A tag class representing greater-than-or-equal-to */
struct op_geq {};
/** @brief A tag class representing less-than-or-equal-to */
struct op_leq {};
template<class T>
struct op_reduce_vector{ };
template<class T>
struct op_reduce_rows{ };
template<class T>
struct op_reduce_columns{ };
/** @brief A tag class representing element-wise casting operations on vectors and matrices */
template<typename OP>
struct op_element_cast {};
/** @brief A tag class representing element-wise binary operations (like multiplication) on vectors or matrices */
template<typename OP>
struct op_element_binary {};
/** @brief A tag class representing element-wise unary operations (like sin()) on vectors or matrices */
template<typename OP>
struct op_element_unary {};
/** @brief A tag class representing the modulus function for integers */
struct op_abs {};
/** @brief A tag class representing the acos() function */
struct op_acos {};
/** @brief A tag class representing the asin() function */
struct op_asin {};
/** @brief A tag class for representing the argmax() function */
struct op_argmax {};
/** @brief A tag class for representing the argmin() function */
struct op_argmin {};
/** @brief A tag class representing the atan() function */
struct op_atan {};
/** @brief A tag class representing the atan2() function */
struct op_atan2 {};
/** @brief A tag class representing the ceil() function */
struct op_ceil {};
/** @brief A tag class representing the cos() function */
struct op_cos {};
/** @brief A tag class representing the cosh() function */
struct op_cosh {};
/** @brief A tag class representing the exp() function */
struct op_exp {};
/** @brief A tag class representing the fabs() function */
struct op_fabs {};
/** @brief A tag class representing the fdim() function */
struct op_fdim {};
/** @brief A tag class representing the floor() function */
struct op_floor {};
/** @brief A tag class representing the fmax() function */
struct op_fmax {};
/** @brief A tag class representing the fmin() function */
struct op_fmin {};
/** @brief A tag class representing the fmod() function */
struct op_fmod {};
/** @brief A tag class representing the log() function */
struct op_log {};
/** @brief A tag class representing the log10() function */
struct op_log10 {};
/** @brief A tag class representing the sin() function */
struct op_sin {};
/** @brief A tag class representing the sinh() function */
struct op_sinh {};
/** @brief A tag class representing the sqrt() function */
struct op_sqrt {};
/** @brief A tag class representing the tan() function */
struct op_tan {};
/** @brief A tag class representing the tanh() function */
struct op_tanh {};
/** @brief A tag class representing the (off-)diagonal of a matrix */
struct op_matrix_diag {};
/** @brief A tag class representing a matrix given by a vector placed on a certain (off-)diagonal */
struct op_vector_diag {};
/** @brief A tag class representing the extraction of a matrix row to a vector */
struct op_row {};
/** @brief A tag class representing the extraction of a matrix column to a vector */
struct op_column {};
/** @brief A tag class representing inner products of two vectors */
struct op_inner_prod {};
/** @brief A tag class representing the 1-norm of a vector */
struct op_norm_1 {};
/** @brief A tag class representing the 2-norm of a vector */
struct op_norm_2 {};
/** @brief A tag class representing the inf-norm of a vector */
struct op_norm_inf {};
/** @brief A tag class representing the maximum of a vector */
struct op_max {};
/** @brief A tag class representing the minimum of a vector */
struct op_min {};
/** @brief A tag class representing the Frobenius-norm of a matrix */
struct op_norm_frobenius {};
/** @brief A tag class representing transposed matrices */
struct op_trans {};
/** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */
struct op_flip_sign {};
template<typename LHS, typename RHS, typename OP>
class vector_expression;
template<typename LHS, typename RHS, typename OP>
class matrix_expression;
template<typename LHS, typename RHS, typename OP>
class scalar_expression;
}
#endif

View File

@@ -3,13 +3,11 @@
#include "rapidjson/document.h"
#include "viennacl/ocl/program.hpp"
#include "viennacl/tools/timer.hpp"
#include "atidlas/backend/templates/template_base.hpp"
#include "atidlas/model/tools.hpp"
#include "atidlas/tools/shared_ptr.hpp"
#include "atidlas/tools/lazy_program_compiler.hpp"
#include "atidlas/backend/templates/template_base.hpp"
#include "atidlas/tools/timer.hpp"
namespace atidlas
{
@@ -107,7 +105,7 @@ namespace atidlas
templates_(1,tp.clone()), context_(context), device_(device)
{}
void execute(statements_container const & statements, bool bypass_predictor = false, bool force_recompilation = false)
void execute(scheduler::statements_container const & statements, bool bypass_predictor = false, bool force_recompilation = false)
{
bypass_predictor = bypass_predictor || predictor_.get()==NULL;
@@ -147,11 +145,11 @@ namespace atidlas
templates_[label]->enqueue("k" + tools::to_string(label), lazy_programs_, statements);
}
void tune(statements_container const & statements)
void tune(scheduler::statements_container const & statements)
{
//Collect the timings
std::vector<float> timings(templates_.size());
viennacl::tools::timer timer;
tools::timer timer;
for(size_t i = 0 ; i < templates_.size() ; ++i)
{
timer.start();

View File

@@ -0,0 +1,424 @@
#ifndef ATIDLAS_SCHEDULER_STATEMENT_HPP
#define ATIDLAS_SCHEDULER_STATEMENT_HPP
#include "atidlas/forwards.h"
#include "atidlas/tools/predicate.hpp"
#include "atidlas/tools/enable_if.hpp"
#include <list>
#include <vector>
namespace atidlas
{
namespace scheduler
{
/** @brief Optimization enum for grouping operations into unary or binary operations. Just for optimization of lookups. */
enum operation_node_type_family
{
OPERATION_INVALID_TYPE_FAMILY = 0,
// unary or binary expression
OPERATION_UNARY_TYPE_FAMILY,
OPERATION_BINARY_TYPE_FAMILY,
//reductions
OPERATION_VECTOR_REDUCTION_TYPE_FAMILY,
OPERATION_ROWS_REDUCTION_TYPE_FAMILY,
OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY
};
/** @brief Enumeration for identifying the possible operations */
enum operation_node_type
{
OPERATION_INVALID_TYPE = 0,
// unary operator
OPERATION_UNARY_MINUS_TYPE,
// unary expression
OPERATION_UNARY_CAST_CHAR_TYPE,
OPERATION_UNARY_CAST_UCHAR_TYPE,
OPERATION_UNARY_CAST_SHORT_TYPE,
OPERATION_UNARY_CAST_USHORT_TYPE,
OPERATION_UNARY_CAST_INT_TYPE,
OPERATION_UNARY_CAST_UINT_TYPE,
OPERATION_UNARY_CAST_LONG_TYPE,
OPERATION_UNARY_CAST_ULONG_TYPE,
OPERATION_UNARY_CAST_HALF_TYPE,
OPERATION_UNARY_CAST_FLOAT_TYPE,
OPERATION_UNARY_CAST_DOUBLE_TYPE,
OPERATION_UNARY_ABS_TYPE,
OPERATION_UNARY_ACOS_TYPE,
OPERATION_UNARY_ASIN_TYPE,
OPERATION_UNARY_ATAN_TYPE,
OPERATION_UNARY_CEIL_TYPE,
OPERATION_UNARY_COS_TYPE,
OPERATION_UNARY_COSH_TYPE,
OPERATION_UNARY_EXP_TYPE,
OPERATION_UNARY_FABS_TYPE,
OPERATION_UNARY_FLOOR_TYPE,
OPERATION_UNARY_LOG_TYPE,
OPERATION_UNARY_LOG10_TYPE,
OPERATION_UNARY_SIN_TYPE,
OPERATION_UNARY_SINH_TYPE,
OPERATION_UNARY_SQRT_TYPE,
OPERATION_UNARY_TAN_TYPE,
OPERATION_UNARY_TANH_TYPE,
OPERATION_UNARY_TRANS_TYPE,
OPERATION_UNARY_NORM_1_TYPE,
OPERATION_UNARY_NORM_2_TYPE,
OPERATION_UNARY_NORM_INF_TYPE,
OPERATION_UNARY_MAX_TYPE,
OPERATION_UNARY_MIN_TYPE,
// binary expression
OPERATION_BINARY_ACCESS_TYPE,
OPERATION_BINARY_ASSIGN_TYPE,
OPERATION_BINARY_INPLACE_ADD_TYPE,
OPERATION_BINARY_INPLACE_SUB_TYPE,
OPERATION_BINARY_ADD_TYPE,
OPERATION_BINARY_SUB_TYPE,
OPERATION_BINARY_MULT_TYPE,
OPERATION_BINARY_DIV_TYPE,
OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE,
OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE,
OPERATION_BINARY_ELEMENT_ARGMAX_TYPE,
OPERATION_BINARY_ELEMENT_ARGMIN_TYPE,
OPERATION_BINARY_ELEMENT_PROD_TYPE,
OPERATION_BINARY_ELEMENT_DIV_TYPE,
OPERATION_BINARY_ELEMENT_EQ_TYPE,
OPERATION_BINARY_ELEMENT_NEQ_TYPE,
OPERATION_BINARY_ELEMENT_GREATER_TYPE,
OPERATION_BINARY_ELEMENT_GEQ_TYPE,
OPERATION_BINARY_ELEMENT_LESS_TYPE,
OPERATION_BINARY_ELEMENT_LEQ_TYPE,
OPERATION_BINARY_ELEMENT_POW_TYPE,
OPERATION_BINARY_ELEMENT_FMAX_TYPE,
OPERATION_BINARY_ELEMENT_FMIN_TYPE,
OPERATION_BINARY_ELEMENT_MAX_TYPE,
OPERATION_BINARY_ELEMENT_MIN_TYPE,
OPERATION_BINARY_MATRIX_DIAG_TYPE,
OPERATION_BINARY_VECTOR_DIAG_TYPE,
OPERATION_BINARY_MATRIX_ROW_TYPE,
OPERATION_BINARY_MATRIX_COLUMN_TYPE,
OPERATION_BINARY_MAT_VEC_PROD_TYPE,
OPERATION_BINARY_MAT_MAT_PROD_TYPE,
OPERATION_BINARY_INNER_PROD_TYPE
};
namespace result_of
{
template<typename T>
struct op_type_info
{
typedef typename T::ERROR_UNKNOWN_OP_TYPE error_type;
};
// elementwise casts
template<> struct op_type_info<op_element_cast<char> > { enum { id = OPERATION_UNARY_CAST_CHAR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<unsigned char> > { enum { id = OPERATION_UNARY_CAST_UCHAR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<short> > { enum { id = OPERATION_UNARY_CAST_SHORT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<unsigned short> > { enum { id = OPERATION_UNARY_CAST_USHORT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<int> > { enum { id = OPERATION_UNARY_CAST_INT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<unsigned int> > { enum { id = OPERATION_UNARY_CAST_UINT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<long> > { enum { id = OPERATION_UNARY_CAST_LONG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<unsigned long> > { enum { id = OPERATION_UNARY_CAST_ULONG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<float> > { enum { id = OPERATION_UNARY_CAST_FLOAT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_cast<double> > { enum { id = OPERATION_UNARY_CAST_DOUBLE_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
// elementwise functions
template<> struct op_type_info<op_element_unary<op_abs> > { enum { id = OPERATION_UNARY_ABS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_acos> > { enum { id = OPERATION_UNARY_ACOS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_asin> > { enum { id = OPERATION_UNARY_ASIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_atan> > { enum { id = OPERATION_UNARY_ATAN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_ceil> > { enum { id = OPERATION_UNARY_CEIL_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_cos> > { enum { id = OPERATION_UNARY_COS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_cosh> > { enum { id = OPERATION_UNARY_COSH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_exp> > { enum { id = OPERATION_UNARY_EXP_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_fabs> > { enum { id = OPERATION_UNARY_FABS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_floor> > { enum { id = OPERATION_UNARY_FLOOR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_log> > { enum { id = OPERATION_UNARY_LOG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_log10> > { enum { id = OPERATION_UNARY_LOG10_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_sin> > { enum { id = OPERATION_UNARY_SIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_sinh> > { enum { id = OPERATION_UNARY_SINH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_sqrt> > { enum { id = OPERATION_UNARY_SQRT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_tan> > { enum { id = OPERATION_UNARY_TAN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_unary<op_tanh> > { enum { id = OPERATION_UNARY_TANH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_argmax> > { enum { id = OPERATION_BINARY_ELEMENT_ARGMAX_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_argmin> > { enum { id = OPERATION_BINARY_ELEMENT_ARGMIN_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_pow> > { enum { id = OPERATION_BINARY_ELEMENT_POW_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_eq> > { enum { id = OPERATION_BINARY_ELEMENT_EQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_neq> > { enum { id = OPERATION_BINARY_ELEMENT_NEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_greater> > { enum { id = OPERATION_BINARY_ELEMENT_GREATER_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_less> > { enum { id = OPERATION_BINARY_ELEMENT_LESS_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_geq> > { enum { id = OPERATION_BINARY_ELEMENT_GEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_leq> > { enum { id = OPERATION_BINARY_ELEMENT_LEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_fmax> > { enum { id = OPERATION_BINARY_ELEMENT_FMAX_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_fmin> > { enum { id = OPERATION_BINARY_ELEMENT_FMIN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
//structurewise function
template<> struct op_type_info<op_norm_1 > { enum { id = OPERATION_UNARY_NORM_1_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_norm_2 > { enum { id = OPERATION_UNARY_NORM_2_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_norm_inf > { enum { id = OPERATION_UNARY_NORM_INF_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_max > { enum { id = OPERATION_UNARY_MAX_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_min > { enum { id = OPERATION_UNARY_MIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_trans > { enum { id = OPERATION_UNARY_TRANS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_row > { enum { id = OPERATION_BINARY_MATRIX_ROW_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_column > { enum { id = OPERATION_BINARY_MATRIX_COLUMN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_matrix_diag> { enum { id = OPERATION_BINARY_MATRIX_DIAG_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_vector_diag> { enum { id = OPERATION_BINARY_VECTOR_DIAG_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_prod> { enum { id = OPERATION_BINARY_MAT_VEC_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_mat_mat_prod> { enum { id = OPERATION_BINARY_MAT_MAT_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_inner_prod> { enum { id = OPERATION_BINARY_INNER_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<typename OP> struct op_type_info<op_reduce_vector<OP> > { enum { id = op_type_info<OP>::id, family = OPERATION_VECTOR_REDUCTION_TYPE_FAMILY}; };
template<typename OP> struct op_type_info<op_reduce_rows<OP> > { enum { id = op_type_info<OP>::id, family = OPERATION_ROWS_REDUCTION_TYPE_FAMILY}; };
template<typename OP> struct op_type_info<op_reduce_columns<OP> > { enum { id = op_type_info<OP>::id, family = OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY}; };
//elementwise operator
template<> struct op_type_info<op_assign> { enum { id = OPERATION_BINARY_ASSIGN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_inplace_add> { enum { id = OPERATION_BINARY_INPLACE_ADD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_inplace_sub> { enum { id = OPERATION_BINARY_INPLACE_SUB_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_add> { enum { id = OPERATION_BINARY_ADD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_sub> { enum { id = OPERATION_BINARY_SUB_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_prod> > { enum { id = OPERATION_BINARY_ELEMENT_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_element_binary<op_div> > { enum { id = OPERATION_BINARY_ELEMENT_DIV_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_mult> { enum { id = OPERATION_BINARY_MULT_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_div> { enum { id = OPERATION_BINARY_DIV_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
template<> struct op_type_info<op_flip_sign> { enum { id = OPERATION_UNARY_MINUS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
/** \endcond */
} // namespace result_of
/** @brief Groups the type of a node in the statement tree. Used for faster dispatching */
enum statement_node_type_family
{
INVALID_TYPE_FAMILY = 0,
// LHS or RHS are again an expression:
COMPOSITE_OPERATION_FAMILY,
// device scalars:
SCALAR_TYPE_FAMILY,
// vector:
VECTOR_TYPE_FAMILY,
// matrices:
MATRIX_TYPE_FAMILY
};
/** @brief Encodes the type of a node in the statement tree. */
enum statement_node_subtype
{
INVALID_SUBTYPE = 0,
HOST_SCALAR_TYPE,
DEVICE_SCALAR_TYPE,
DENSE_VECTOR_TYPE,
IMPLICIT_VECTOR_TYPE,
DENSE_MATRIX_TYPE,
IMPLICIT_MATRIX_TYPE,
};
/** @brief A class representing the 'data' for the LHS or RHS operand of the respective node.
*
* If it represents a compound expression, the union holds the array index within the respective statement array.
* If it represents a object with data (vector, matrix, etc.) it holds the respective pointer (scalar, vector, matrix) or value (host scalar)
*
* The member 'type_family' is an optimization for quickly retrieving the 'type', which denotes the currently 'active' member in the union
*/
struct lhs_rhs_element
{
statement_node_type_family type_family;
statement_node_subtype subtype;
numeric_type numeric_t;
union
{
unsigned int node_index;
atidlas::vector_base * vector;
atidlas::matrix_base * matrix;
};
};
/** @brief Struct for holding the type family as well as the type of an operation (could be addition, subtraction, norm, etc.) */
struct op_element
{
operation_node_type_family type_family;
operation_node_type type;
};
/** @brief Main datastructure for an node in the statement tree */
struct statement_node
{
lhs_rhs_element lhs;
op_element op;
lhs_rhs_element rhs;
};
namespace result_of
{
template<class T> struct num_nodes { enum { value = 0 }; };
template<class LHS, class OP, class RHS> struct num_nodes< vector_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
template<class LHS, class OP, class RHS> struct num_nodes< const vector_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
template<class LHS, class OP, class RHS> struct num_nodes< matrix_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
template<class LHS, class OP, class RHS> struct num_nodes< const matrix_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
template<class LHS, class OP, class RHS> struct num_nodes< scalar_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
template<class LHS, class OP, class RHS> struct num_nodes< const scalar_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
}
/** \brief The main class for representing a statement such as x = inner_prod(y,z); at runtime.
*
* This is the equivalent to an expression template tree, but entirely built at runtime in order to perform really cool stuff such as kernel fusion.
*/
class statement
{
public:
typedef statement_node value_type;
typedef std::vector<value_type> container_type;
statement(container_type const & custom_array) : array_(custom_array) {}
/** @brief Generate the runtime statement from an expression template.
*
* Constructing a runtime statement from expression templates makes perfect sense, because this way only a single allocation is needed when creating the statement. */
template<typename LHS, typename OP, typename RHS>
statement(LHS & lhs, OP const &, RHS const & rhs) : array_(1 + result_of::num_nodes<RHS>::value)
{
array_[0].op.type_family = operation_node_type_family(result_of::op_type_info<OP>::family);
array_[0].op.type = operation_node_type(result_of::op_type_info<OP>::id);
add_lhs(0, 1, lhs);
add_rhs(0, 1, rhs);
}
container_type const & array() const { return array_; }
unsigned int root() const { return 0; }
private:
//////////// Tree nodes (non-terminals) ////////////////////
unsigned int add_element(unsigned int next_free, lhs_rhs_element & elem, vector_base const & x)
{
elem.type_family = VECTOR_TYPE_FAMILY;
elem.subtype = DENSE_VECTOR_TYPE;
elem.vector = const_cast<vector_base*>(&x);
return next_free;
}
template<typename LHS, typename RHS, typename OP>
unsigned int add_element(unsigned int next_free,
lhs_rhs_element & elem,
scalar_expression<LHS, RHS, OP> const & t)
{
elem.type_family = COMPOSITE_OPERATION_FAMILY;
elem.subtype = INVALID_SUBTYPE;
elem.node_index = next_free;
return add_node(next_free, next_free + 1, t);
}
template<typename LHS, typename RHS, typename OP>
unsigned int add_element(unsigned int next_free,
lhs_rhs_element & elem,
vector_expression<LHS, RHS, OP> const & t)
{
elem.type_family = COMPOSITE_OPERATION_FAMILY;
elem.subtype = INVALID_SUBTYPE;
elem.node_index = next_free;
return add_node(next_free, next_free + 1, t);
}
template<typename LHS, typename RHS, typename OP>
unsigned int add_element(unsigned int next_free,
lhs_rhs_element & elem,
matrix_expression<LHS, RHS, OP> const & t)
{
elem.type_family = COMPOSITE_OPERATION_FAMILY;
elem.subtype = INVALID_SUBTYPE;
elem.numeric_t = INVALID_NUMERIC_TYPE;
elem.node_index = next_free;
return add_node(next_free, next_free + 1, t);
}
template<typename T>
unsigned int add_lhs(unsigned int current_index, unsigned int next_free, T const & t)
{ return add_element(next_free, array_[current_index].lhs, t); }
template<typename T>
unsigned int add_rhs(unsigned int current_index, unsigned int next_free, T const & t)
{ return add_element(next_free, array_[current_index].rhs, t); }
template<template<typename, typename, typename> class ExpressionT, typename LHS, typename RHS, typename OP>
unsigned int add_node(unsigned int current_index, unsigned int next_free, ExpressionT<LHS, RHS, OP> const & proxy)
{
// set OP:
array_[current_index].op.type_family = operation_node_type_family(result_of::op_type_info<OP>::family);
array_[current_index].op.type = operation_node_type(result_of::op_type_info<OP>::id);
// set LHS and RHS:
if (array_[current_index].op.type_family == OPERATION_UNARY_TYPE_FAMILY)
{
// unary expression: set rhs to invalid:
array_[current_index].rhs.type_family = INVALID_TYPE_FAMILY;
array_[current_index].rhs.subtype = INVALID_SUBTYPE;
array_[current_index].rhs.numeric_t = INVALID_NUMERIC_TYPE;
return add_lhs(current_index, next_free, proxy.lhs());
}
return add_rhs(current_index, add_lhs(current_index, next_free, proxy.lhs()), proxy.rhs());
}
container_type array_;
};
class statements_container
{
public:
typedef std::list<scheduler::statement> data_type;
enum order_type { SEQUENTIAL, INDEPENDENT };
statements_container(data_type const & data, order_type order) : data_(data), order_(order)
{ }
statements_container(scheduler::statement const & s0) : order_(INDEPENDENT)
{
data_.push_back(s0);
}
statements_container(scheduler::statement const & s0, scheduler::statement const & s1, order_type order) : order_(order)
{
data_.push_back(s0);
data_.push_back(s1);
}
std::list<scheduler::statement> const & data() const { return data_; }
order_type order() const { return order_; }
private:
std::list<scheduler::statement> data_;
order_type order_;
};
} // namespace scheduler
} // namespace viennacl
#endif

View File

@@ -0,0 +1,43 @@
#ifndef VIENNACL_META_ENABLE_IF_HPP_
#define VIENNACL_META_ENABLE_IF_HPP_
/* =========================================================================
Copyright (c) 2010-2014, Institute for Microelectronics,
Institute for Analysis and Scientific Computing,
TU Wien.
Portions of this software are copyright by UChicago Argonne, LLC.
-----------------
ViennaCL - The Vienna Computing Library
-----------------
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
(A list of authors and contributors can be found in the PDF manual)
License: MIT (X11), see file LICENSE in the base directory
============================================================================= */
/** @file viennacl/meta/enable_if.hpp
@brief Simple enable-if variant that uses the SFINAE pattern
*/
namespace viennacl
{
/** @brief Simple enable-if variant that uses the SFINAE pattern */
template<bool b, class T = void>
struct enable_if
{
typedef T type;
};
/** \cond */
template<class T>
struct enable_if<false, T> {};
/** \endcond */
} //namespace viennacl
#endif

View File

@@ -1,77 +0,0 @@
#ifndef ATIDLAS_EXECUTION_HANDLER_HPP
#define ATIDLAS_EXECUTION_HANDLER_HPP
#include <map>
#include "viennacl/tools/shared_ptr.hpp"
#include "atidlas/tools/lazy_program_compiler.hpp"
#include "atidlas/backend/templates/template_base.hpp"
namespace atidlas
{
class execution_handler
{
public:
typedef std::map< std::string, tools::shared_ptr<template_base> > container_type;
private:
std::string append_prefix(std::string const & str)
{
return "_" + str;
}
std::string define_extension(std::string const & ext)
{
// Note: On devices without double precision support, 'ext' is an empty string.
return (ext.length() > 1) ? std::string("#pragma OPENCL EXTENSION " + ext + " : enable\n") : std::string("\n");
}
void init_program_compiler(std::string const & name, bool force_recompilation)
{
lazy_programs_.push_back(lazy_program_compiler(&ctx_, name, force_recompilation));
lazy_programs_.back().add(define_extension(device_.double_support_extension()));
}
public:
execution_handler(std::string const & program_name_base, viennacl::ocl::context & ctx, viennacl::ocl::device const & device, bool force_recompilation = false) : ctx_(ctx), device_(device), program_names_(2), init_done_(false)
{
lazy_programs_.reserve(2);
init_program_compiler(program_name_base + "_0", force_recompilation);
init_program_compiler(program_name_base + "_1", force_recompilation);
}
void add(std::string const & key, template_base const & T, statements_container const & statements)
{
if (kernels_.insert(container_type::value_type(key, T.clone())).second)
{
std::vector<std::string> sources = kernels_.at(key)->generate(append_prefix(key), statements, device_);
assert(sources.size()<=2);
for (unsigned int i = 0; i < sources.size(); ++i)
lazy_programs_[i].add(sources[i]);
}
}
template_base * template_of(std::string const & key)
{
return kernels_.at(key).get();
}
void execute(container_type::key_type const & key, statements_container const & statements)
{
tools::shared_ptr<template_base> & template_pointer = kernels_.at(key);
template_pointer->enqueue(append_prefix(key), lazy_programs_, statements);
}
private:
viennacl::ocl::context & ctx_;
viennacl::ocl::device const & device_;
container_type kernels_;
std::vector<std::string> program_names_;
std::vector<lazy_program_compiler> lazy_programs_;
bool init_done_;
};
}
#endif

View File

@@ -2,7 +2,7 @@
#define ATIDLAS_LAZY_PROGRAM_COMPILER_HPP
#include <map>
#include "viennacl/ocl/context.hpp"
#include <CL/cl.hpp>
namespace atidlas
{
@@ -11,29 +11,29 @@ namespace atidlas
{
public:
lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, std::string const & src, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), src_(src), force_recompilation_(force_recompilation){ }
lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), force_recompilation_(force_recompilation){ }
lazy_program_compiler(cl::Context * ctx, std::string const & name, std::string const & src, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), src_(src), force_recompilation_(force_recompilation){ }
lazy_program_compiler(cl::Context * ctx, std::string const & name, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), force_recompilation_(force_recompilation){ }
void add(std::string const & src) { src_+=src; }
std::string const & src() const { return src_; }
viennacl::ocl::program & program()
cl::Program & program()
{
if(program_==NULL)
{
if (force_recompilation_ && ctx_->has_program(name_))
ctx_->delete_program(name_);
if (!ctx_->has_program(name_))
ctx_->add_program(src_, name_);
program_ = &ctx_->get_program(name_);
}
return *program_;
// if(program_==NULL)
// {
// if (force_recompilation_ && ctx_->has_program(name_))
// ctx_->delete_program(name_);
// if (!ctx_->has_program(name_))
// ctx_->add_program(src_, name_);
// program_ = &ctx_->get_program(name_);
// }
// return *program_;
}
private:
viennacl::ocl::context * ctx_;
viennacl::ocl::program * program_;
cl::Context * ctx_;
cl::Program * program_;
std::string name_;
std::string src_;
bool force_recompilation_;

View File

@@ -0,0 +1,47 @@
#ifndef ATIDLAS_PREDICATE_HPP_
#define ATIDLAS_PREDICATE_HPP_
namespace atidlas
{
/** @brief Helper class for checking whether a type is a primitive type. */
template<class T>
struct is_primitive_type{ enum {value = false}; };
template<> struct is_primitive_type<float> { enum { value = true }; };
template<> struct is_primitive_type<double> { enum { value = true }; };
template<> struct is_primitive_type<unsigned int> { enum { value = true }; };
template<> struct is_primitive_type<int> { enum { value = true }; };
template<> struct is_primitive_type<unsigned char> { enum { value = true }; };
template<> struct is_primitive_type<char> { enum { value = true }; };
template<> struct is_primitive_type<unsigned long> { enum { value = true }; };
template<> struct is_primitive_type<long> { enum { value = true }; };
template<> struct is_primitive_type<unsigned short>{ enum { value = true }; };
template<> struct is_primitive_type<short> { enum { value = true }; };
/** @brief Helper class for checking whether a particular type is a native OpenCL type. */
template<class T>
struct is_cl_type{ enum { value = false }; };
template<> struct is_cl_type<cl_float> { enum { value = true }; };
template<> struct is_cl_type<cl_double>{ enum { value = true }; };
template<> struct is_cl_type<cl_uint> { enum { value = true }; };
template<> struct is_cl_type<cl_int> { enum { value = true }; };
template<> struct is_cl_type<cl_uchar> { enum { value = true }; };
template<> struct is_cl_type<cl_char> { enum { value = true }; };
template<> struct is_cl_type<cl_ulong> { enum { value = true }; };
template<> struct is_cl_type<cl_long> { enum { value = true }; };
template<> struct is_cl_type<cl_ushort>{ enum { value = true }; };
template<> struct is_cl_type<cl_short> { enum { value = true }; };
/** @brief Helper class for checking whether a particular type is a floating point type. */
template<class T> struct is_floating_point { enum { value = false }; };
template<> struct is_floating_point<float> { enum { value = true }; };
template<> struct is_floating_point<double> { enum { value = true }; };
}
#endif

123
atidlas/tools/timer.hpp Normal file
View File

@@ -0,0 +1,123 @@
#ifndef _VIENNACL_TOOLS_TIMER_HPP_
#define _VIENNACL_TOOLS_TIMER_HPP_
/* =========================================================================
Copyright (c) 2010-2014, Institute for Microelectronics,
Institute for Analysis and Scientific Computing,
TU Wien.
Portions of this software are copyright by UChicago Argonne, LLC.
-----------------
ViennaCL - The Vienna Computing Library
-----------------
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
(A list of authors and contributors can be found in the PDF manual)
License: MIT (X11), see file LICENSE in the base directory
============================================================================= */
/** @file viennacl/tools/timer.hpp
@brief A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling. */
#include <iostream>
#ifdef _WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
#undef min
#undef max
namespace viennacl
{
namespace tools
{
/** @brief Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
*
* Avoids messing with Boost and should be sufficient for benchmarking purposes.
*/
class timer
{
public:
timer()
{
QueryPerformanceFrequency(&freq);
}
void start()
{
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
}
double get() const
{
LARGE_INTEGER elapsed;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
elapsed.QuadPart = end_time.QuadPart - start_time.QuadPart;
return elapsed.QuadPart / static_cast<double>(freq.QuadPart);
}
private:
LARGE_INTEGER freq;
LARGE_INTEGER start_time;
LARGE_INTEGER end_time;
};
}
}
#else
#include <sys/time.h>
namespace viennacl
{
namespace tools
{
/** @brief Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
*
* Avoids messing with Boost and should be sufficient for benchmarking purposes.
*/
class timer
{
public:
timer() : ts(0)
{}
void start()
{
struct timeval tval;
gettimeofday(&tval, NULL);
ts = static_cast<double>(tval.tv_sec * 1000000 + tval.tv_usec);
}
double get() const
{
struct timeval tval;
gettimeofday(&tval, NULL);
double end_time = static_cast<double>(tval.tv_sec * 1000000 + tval.tv_usec);
return static_cast<double>(end_time-ts) / 1000000.0;
}
private:
double ts;
};
}
}
#endif
#endif

43
atidlas/traits/size.hpp Normal file
View File

@@ -0,0 +1,43 @@
#ifndef ATIDLAS_TRAITS_SIZE_HPP_
#define ATIDLAS_TRAITS_SIZE_HPP_
#include "atidlas/forwards.h"
#include "atidlas/tools/predicate.hpp"
#include <vector>
namespace atidlas
{
namespace traits
{
template<typename LHS>
atidlas_int_t size(vector_expression<LHS, const int, op_matrix_diag> const & proxy)
{
int k = proxy.rhs();
int A_size1 = static_cast<int>(size1(proxy.lhs()));
int A_size2 = static_cast<int>(size2(proxy.lhs()));
int row_depth = std::min(A_size1, A_size1 + k);
int col_depth = std::min(A_size2, A_size2 - k);
return atidlas_int_t(std::min(row_depth, col_depth));
}
template<typename LHS>
atidlas_int_t size(vector_expression<LHS, const unsigned int, op_row> const & proxy)
{ return size2(proxy.lhs());}
template<typename LHS>
atidlas_int_t size(vector_expression<LHS, const unsigned int, op_column> const & proxy)
{ return size1(proxy.lhs());}
inline atidlas_int_t size(vector_base const & x)
{ return x.size(); }
}
}
#endif

74
atidlas/vector.hpp Normal file
View File

@@ -0,0 +1,74 @@
#ifndef ATIDLAS_VECTOR_H
#define ATIDLAS_VECTOR_H
#include <cassert>
#include "atidlas/forwards.h"
#include "atidlas/scheduler/forwards.h"
#include "atidlas/expression_template.hpp"
namespace atidlas
{
class vector : public vector_base
{
public:
vector(atidlas_int_t size, numeric_type dtype, cl::Context context) : vector_base(size, dtype, context){}
template<typename T>
vector & operator=(T const & other)
{
vector_base::operator=(other);
return *this;
}
// using vector_base::operator+=;
// using vector_base::operator-=;
};
#define ATIDLAS_ADD_BINARY_OPERATOR(TYPE, OP) \
template<typename XL, typename XR, typename XOP, \
typename YL, typename YR, typename YOP> \
TYPE ## _expression< const TYPE ## _expression< XL, XR, XOP>, const TYPE ## _expression< YL, YR, YOP>, OP> \
operator + (TYPE ## _expression<XL, XR, XOP> const & x, TYPE ## _expression<YL, YR, YOP> const & y) \
{ \
assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \
return TYPE ## _expression< const TYPE ## _expression<XL, XR, XOP>, const TYPE ## _expression<YL, YR, YOP>, OP>(x, y); \
} \
\
template<typename XL, typename XR, typename XOP> \
TYPE ## _expression< const TYPE ## _expression< XL, XR, XOP>, const TYPE ## _base, OP> \
operator + (TYPE ## _expression<XL, XR, XOP> const & x, TYPE ## _base const & y) \
{ \
assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \
return TYPE ## _expression< const TYPE ## _expression<XL, XR, XOP>, const TYPE ## _base, OP>(x, y); \
} \
\
template<typename T, typename YL, typename YR, typename YOP> \
TYPE ## _expression< const TYPE ## _expression< YL, YR, YOP>, const TYPE ## _base, OP> \
operator + (TYPE ## _base const & x, TYPE ## _expression<YL, YR, YOP> const & y) \
{ \
assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \
return TYPE ## _expression<const TYPE ## _base, const TYPE ## _expression<YL, YR, YOP>, OP>(x, y); \
} \
\
TYPE ## _expression< const TYPE ## _base, const TYPE ## _base, OP> \
operator + (TYPE ## _base const & x, TYPE ## _base const & y) \
{ \
assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \
return TYPE ## _expression<const TYPE ## _base, const TYPE ## _base, OP>(x, y); \
}
ATIDLAS_ADD_BINARY_OPERATOR(vector, op_add)
#undef ATIDLAS_ADD_BINARY_OPERATOR
template<class LHS, class RHS, class OP>
vector_base & vector_base::operator=(vector_expression<LHS, RHS, OP> const & operation)
{
scheduler::statement s(*this, op_assign(), operation);
return *this;
}
}
#endif