Removing ViennaCL dependency
This commit is contained in:
@@ -7,13 +7,7 @@
|
||||
#include <stdexcept>
|
||||
|
||||
#include "atidlas/tools/shared_ptr.hpp"
|
||||
|
||||
#include "viennacl/scheduler/io.hpp"
|
||||
#include "viennacl/ocl/forwards.h"
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
#include "viennacl/backend/mem_handle.hpp"
|
||||
#include "viennacl/device_specific/forwards.h"
|
||||
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
@@ -50,22 +44,22 @@ struct atidlas_int_tuple
|
||||
std::string bound1;
|
||||
};
|
||||
|
||||
inline bool is_scalar_reduction(viennacl::scheduler::statement_node const & node)
|
||||
inline bool is_scalar_reduction(scheduler::statement_node const & node)
|
||||
{
|
||||
return node.op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE || node.op.type_family==viennacl::scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY;
|
||||
return node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE || node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY;
|
||||
}
|
||||
|
||||
inline bool is_vector_reduction(viennacl::scheduler::statement_node const & node)
|
||||
inline bool is_vector_reduction(scheduler::statement_node const & node)
|
||||
{
|
||||
return node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
|
||||
|| node.op.type_family==viennacl::scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
|
||||
|| node.op.type_family==viennacl::scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY;
|
||||
return node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
|
||||
|| node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
|
||||
|| node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY;
|
||||
}
|
||||
|
||||
inline viennacl::scheduler::statement_node const & lhs_most(viennacl::scheduler::statement::container_type const & array, size_t root)
|
||||
inline scheduler::statement_node const & lhs_most(scheduler::statement::container_type const & array, size_t root)
|
||||
{
|
||||
viennacl::scheduler::statement_node const * current = &array[root];
|
||||
while (current->lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
scheduler::statement_node const * current = &array[root];
|
||||
while (current->lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
current = &array[current->lhs.node_index];
|
||||
return *current;
|
||||
}
|
||||
@@ -162,38 +156,27 @@ namespace tools
|
||||
{
|
||||
|
||||
template<class Fun>
|
||||
inline void traverse(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect);
|
||||
inline void traverse(scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect);
|
||||
|
||||
inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap<std::string, std::string> const & accessors,
|
||||
viennacl::scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed);
|
||||
inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors, viennacl::scheduler::statement const & statement, atidlas_int_t root_idx,mapping_type const & mapping);
|
||||
scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed);
|
||||
inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors, scheduler::statement const & statement, atidlas_int_t root_idx,mapping_type const & mapping);
|
||||
}
|
||||
|
||||
using viennacl::scheduler::INT_TYPE;
|
||||
using viennacl::scheduler::UINT_TYPE;
|
||||
using viennacl::scheduler::ULONG_TYPE;
|
||||
using viennacl::scheduler::LONG_TYPE;
|
||||
using viennacl::scheduler::FLOAT_TYPE;
|
||||
using viennacl::scheduler::DOUBLE_TYPE;
|
||||
|
||||
typedef cl_uint vendor_id_type;
|
||||
typedef cl_device_type device_type;
|
||||
typedef std::string device_name_type;
|
||||
|
||||
class symbolic_binder
|
||||
{
|
||||
public:
|
||||
virtual ~symbolic_binder(){ }
|
||||
virtual bool bind(viennacl::backend::mem_handle const * ph) = 0;
|
||||
virtual unsigned int get(viennacl::backend::mem_handle const * ph) = 0;
|
||||
virtual bool bind(cl::Buffer const * ph) = 0;
|
||||
virtual unsigned int get(cl::Buffer const * ph) = 0;
|
||||
};
|
||||
|
||||
class bind_to_handle : public symbolic_binder
|
||||
{
|
||||
public:
|
||||
bind_to_handle() : current_arg_(0){ }
|
||||
bool bind(viennacl::backend::mem_handle const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; }
|
||||
unsigned int get(viennacl::backend::mem_handle const * ph){ return bind(ph)?current_arg_++:memory.at((void*)ph); }
|
||||
bool bind(cl::Buffer const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; }
|
||||
unsigned int get(cl::Buffer const * ph){ return bind(ph)?current_arg_++:memory.at((void*)ph); }
|
||||
private:
|
||||
unsigned int current_arg_;
|
||||
std::map<void*,unsigned int> memory;
|
||||
@@ -203,8 +186,8 @@ class bind_all_unique : public symbolic_binder
|
||||
{
|
||||
public:
|
||||
bind_all_unique() : current_arg_(0){ }
|
||||
bool bind(viennacl::backend::mem_handle const *) {return true; }
|
||||
unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; }
|
||||
bool bind(cl::Buffer const *) {return true; }
|
||||
unsigned int get(cl::Buffer const *){ return current_arg_++; }
|
||||
private:
|
||||
unsigned int current_arg_;
|
||||
std::map<void*,unsigned int> memory;
|
||||
@@ -226,8 +209,6 @@ inline tools::shared_ptr<symbolic_binder> make_binder(binding_policy_t policy)
|
||||
template<char C>
|
||||
struct char_to_type{ };
|
||||
|
||||
typedef viennacl::device_specific::statements_container statements_container;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -3,9 +3,9 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
|
||||
#include "atidlas/forwards.h"
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/tools/find_and_replace.hpp"
|
||||
#include "atidlas/backend/tools/misc.hpp"
|
||||
|
||||
@@ -63,10 +63,10 @@ protected:
|
||||
public:
|
||||
struct node_info
|
||||
{
|
||||
node_info(mapping_type const * _mapping, viennacl::scheduler::statement const * _statement, atidlas_int_t _root_idx) :
|
||||
node_info(mapping_type const * _mapping, scheduler::statement const * _statement, atidlas_int_t _root_idx) :
|
||||
mapping(_mapping), statement(_statement), root_idx(_root_idx) { }
|
||||
mapping_type const * mapping;
|
||||
viennacl::scheduler::statement const * statement;
|
||||
scheduler::statement const * statement;
|
||||
atidlas_int_t root_idx;
|
||||
};
|
||||
|
||||
@@ -153,16 +153,16 @@ public:
|
||||
mapped_reduction(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) : mapped_object(scalartype, id, type_key), binary_leaf(info){ }
|
||||
|
||||
atidlas_int_t root_idx() const { return info_.root_idx; }
|
||||
viennacl::scheduler::statement const & statement() const { return *info_.statement; }
|
||||
viennacl::scheduler::statement_node root_node() const { return statement().array()[root_idx()]; }
|
||||
scheduler::statement const & statement() const { return *info_.statement; }
|
||||
scheduler::statement_node root_node() const { return statement().array()[root_idx()]; }
|
||||
bool is_index_reduction() const { return tools::is_index_reduction(info_.statement->array()[info_.root_idx].op); }
|
||||
|
||||
viennacl::scheduler::op_element root_op() const
|
||||
scheduler::op_element root_op() const
|
||||
{
|
||||
viennacl::scheduler::op_element res = info_.statement->array()[info_.root_idx].op;
|
||||
if (res.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
|
||||
||res.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE)
|
||||
res.type = viennacl::scheduler::OPERATION_BINARY_ADD_TYPE;
|
||||
scheduler::op_element res = info_.statement->array()[info_.root_idx].op;
|
||||
if (res.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
|
||||
||res.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE)
|
||||
res.type = scheduler::OPERATION_BINARY_ADD_TYPE;
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
@@ -5,9 +5,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
#include "viennacl/tools/tools.hpp"
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
@@ -28,7 +26,7 @@ public:
|
||||
class matrix_axpy_template : public template_base_impl<matrix_axpy_template, matrix_axpy_parameters>
|
||||
{
|
||||
private:
|
||||
int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const
|
||||
int check_invalid_impl(cl::Device const &, statements_container const &) const
|
||||
{
|
||||
if (p_.simd_width>1)
|
||||
return TEMPLATE_INVALID_SIMD_WIDTH;
|
||||
@@ -99,14 +97,14 @@ public:
|
||||
|
||||
std::vector<atidlas_int_t> input_sizes(statements_container const & statements)
|
||||
{
|
||||
viennacl::scheduler::statement const & statement = statements.data().front();
|
||||
scheduler::statement const & statement = statements.data().front();
|
||||
std::pair<atidlas_int_t, atidlas_int_t> size = matrix_size(lhs_most(statement.array(), statement.root()), up_to_internal_size_);
|
||||
return tools::make_vector<atidlas_int_t>() << size.first << size.second;
|
||||
}
|
||||
|
||||
void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements)
|
||||
{
|
||||
viennacl::ocl::kernel & kernel = programs[0].program().get_kernel(kernel_prefix);
|
||||
cl::Kernel & kernel = programs[0].program().get_kernel(kernel_prefix);
|
||||
|
||||
kernel.local_work_size(0, p_.local_size_0);
|
||||
kernel.local_work_size(1, p_.local_size_1);
|
||||
@@ -119,7 +117,7 @@ public:
|
||||
kernel.arg(current_arg++, cl_uint(MN[1]));
|
||||
set_arguments(statements, kernel, current_arg);
|
||||
|
||||
viennacl::ocl::enqueue(kernel);
|
||||
// viennacl::ocl::enqueue(kernel);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -4,14 +4,9 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
|
||||
#include "viennacl/matrix_def.hpp"
|
||||
#include "viennacl/matrix_proxy.hpp"
|
||||
#include "viennacl/forwards.h"
|
||||
#include "viennacl/tools/tools.hpp"
|
||||
|
||||
#include "atidlas/forwards.h"
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/tools/align.hpp"
|
||||
|
||||
namespace atidlas
|
||||
@@ -50,8 +45,8 @@ class matrix_product_template : public template_base_impl<matrix_product_templat
|
||||
private:
|
||||
unsigned int lmem_usage(statements_container const & statements) const
|
||||
{
|
||||
viennacl::scheduler::statement const & statement = statements.data().front();
|
||||
viennacl::scheduler::statement_node_numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
|
||||
scheduler::statement const & statement = statements.data().front();
|
||||
scheduler::numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
|
||||
|
||||
unsigned int N = 0;
|
||||
if (p_.A_fetching_policy==FETCH_FROM_LOCAL)
|
||||
@@ -63,14 +58,14 @@ private:
|
||||
|
||||
unsigned int registers_usage(statements_container const & statements) const
|
||||
{
|
||||
viennacl::scheduler::statement const & statement = statements.data().front();
|
||||
viennacl::scheduler::statement_node_numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
|
||||
scheduler::statement const & statement = statements.data().front();
|
||||
scheduler::numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
|
||||
|
||||
unsigned int N = p_.mS * p_.nS + p_.mS * p_.kS + p_.kS * p_.nS;
|
||||
return N*tools::size_of(numeric_type);
|
||||
}
|
||||
|
||||
int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const
|
||||
int check_invalid_impl(cl::Device const &, statements_container const &) const
|
||||
{
|
||||
if (p_.A_fetching_policy!=FETCH_FROM_LOCAL && p_.B_fetching_policy!=FETCH_FROM_LOCAL&& (p_.local_fetch_0!=0 || p_.local_fetch_1!=0))
|
||||
return TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH;
|
||||
@@ -118,15 +113,15 @@ private:
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
static void parse(viennacl::scheduler::statement const & s,
|
||||
static void parse(scheduler::statement const & s,
|
||||
atidlas_int_t & C_idx, leaf_t & C_leaf, atidlas_int_t & alpha_idx, leaf_t & alpha_leaf,
|
||||
atidlas_int_t & A_idx, leaf_t & A_leaf, bool& A_trans, atidlas_int_t & B_idx, leaf_t & B_leaf, bool& B_trans,
|
||||
atidlas_int_t & beta_idx, leaf_t & beta_leaf)
|
||||
{
|
||||
using namespace tools;
|
||||
using namespace viennacl::scheduler;
|
||||
using namespace scheduler;
|
||||
|
||||
viennacl::scheduler::statement::container_type const & array = s.array();
|
||||
scheduler::statement::container_type const & array = s.array();
|
||||
atidlas_int_t root_idx = s.root();
|
||||
|
||||
C_idx = root_idx;
|
||||
@@ -206,7 +201,7 @@ private:
|
||||
/// INIT
|
||||
/// //////////////
|
||||
tools::kernel_generation_stream stream;
|
||||
viennacl::scheduler::statement const & st = statements.data().front();
|
||||
scheduler::statement const & st = statements.data().front();
|
||||
mapping_type const & mapping = mappings.front();
|
||||
|
||||
bool A_trans = false, B_trans = false;
|
||||
@@ -658,23 +653,23 @@ private:
|
||||
}
|
||||
|
||||
template<class NumericT>
|
||||
void enqueue_block(viennacl::scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K,
|
||||
viennacl::scheduler::lhs_rhs_element& eA, viennacl::scheduler::lhs_rhs_element& eB, viennacl::scheduler::lhs_rhs_element& eC, viennacl::scheduler::lhs_rhs_element& ebeta,
|
||||
void enqueue_block(scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K,
|
||||
scheduler::lhs_rhs_element& eA, scheduler::lhs_rhs_element& eB, scheduler::lhs_rhs_element& eC, scheduler::lhs_rhs_element& ebeta,
|
||||
viennacl::matrix_base<NumericT> const & A, viennacl::matrix_base<NumericT> const & B, viennacl::matrix_base<NumericT> const & C, NumericT beta,
|
||||
std::vector<lazy_program_compiler> & programs, std::string const & kernel_prefix, int id)
|
||||
{
|
||||
if (A.size1()==0 || A.size2()==0 || B.size1()==0 || B.size2()==0 || C.size1()==0 || C.size2()==0)
|
||||
return;
|
||||
|
||||
viennacl::ocl::kernel& kernel = programs[id].program().get_kernel(kernel_prefix);
|
||||
cl::Kernel& kernel = programs[id].program().get_kernel(kernel_prefix);
|
||||
|
||||
kernel.local_work_size(0, p_.local_size_0);
|
||||
kernel.local_work_size(1, p_.local_size_1);
|
||||
|
||||
viennacl::scheduler::statement::assign_element(eA, A);
|
||||
viennacl::scheduler::statement::assign_element(eB, B);
|
||||
viennacl::scheduler::statement::assign_element(eC, C);
|
||||
viennacl::scheduler::statement::assign_element(ebeta, beta);
|
||||
scheduler::statement::assign_element(eA, A);
|
||||
scheduler::statement::assign_element(eB, B);
|
||||
scheduler::statement::assign_element(eC, C);
|
||||
scheduler::statement::assign_element(ebeta, beta);
|
||||
|
||||
if (id==1)
|
||||
{
|
||||
@@ -691,12 +686,12 @@ private:
|
||||
kernel.arg(current_arg++, cl_uint(N));
|
||||
kernel.arg(current_arg++, cl_uint(K));
|
||||
set_arguments(statement, kernel, current_arg);
|
||||
viennacl::ocl::enqueue(kernel);
|
||||
// viennacl::ocl::enqueue(kernel);
|
||||
|
||||
}
|
||||
|
||||
template<class NumericT>
|
||||
viennacl::matrix_slice< viennacl::matrix_base<NumericT> > create_slice(viennacl::matrix_base<NumericT>* viennacl::scheduler::lhs_rhs_element::*ptr, viennacl::scheduler::lhs_rhs_element const & element,
|
||||
viennacl::matrix_slice< viennacl::matrix_base<NumericT> > create_slice(viennacl::matrix_base<NumericT>* scheduler::lhs_rhs_element::*ptr, scheduler::lhs_rhs_element const & element,
|
||||
atidlas_int_t s0_0, atidlas_int_t s0_1, atidlas_int_t s1_0, atidlas_int_t s1_1, bool swap)
|
||||
{
|
||||
viennacl::matrix_base<NumericT> & M = *(element.*ptr);
|
||||
@@ -708,20 +703,20 @@ private:
|
||||
}
|
||||
|
||||
template<class NumericT>
|
||||
void enqueue_impl(viennacl::matrix_base<NumericT>* viennacl::scheduler::lhs_rhs_element::*ptr_matrix,
|
||||
viennacl::scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K,
|
||||
viennacl::scheduler::lhs_rhs_element & A, viennacl::scheduler::lhs_rhs_element & B, viennacl::scheduler::lhs_rhs_element & C, viennacl::scheduler::lhs_rhs_element & beta,
|
||||
void enqueue_impl(viennacl::matrix_base<NumericT>* scheduler::lhs_rhs_element::*ptr_matrix,
|
||||
scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K,
|
||||
scheduler::lhs_rhs_element & A, scheduler::lhs_rhs_element & B, scheduler::lhs_rhs_element & C, scheduler::lhs_rhs_element & beta,
|
||||
NumericT beta_value, std::vector<lazy_program_compiler> & programs, std::string const & kernel_prefix)
|
||||
{
|
||||
using namespace tools;
|
||||
|
||||
std::string kernel_prefix_fb = kernel_prefix + "_fb";
|
||||
|
||||
atidlas_int_t ldstrideA = call_on_matrix(A, leading_stride_fun());
|
||||
atidlas_int_t ldstrideB = call_on_matrix(B, leading_stride_fun());
|
||||
atidlas_int_t ldstrideC = call_on_matrix(C, leading_stride_fun());
|
||||
atidlas_int_t ldstartA = call_on_matrix(A, leading_start_fun());
|
||||
atidlas_int_t ldstartB = call_on_matrix(B, leading_start_fun());
|
||||
atidlas_int_t ldstrideA = traits::ldstride(*A.matrix);
|
||||
atidlas_int_t ldstrideB = traits::ldstride(*B.matrix);
|
||||
atidlas_int_t ldstrideC = traits::ldstride(*C.matrix);
|
||||
atidlas_int_t ldstartA = traits::ldstart(*A.matrix);
|
||||
atidlas_int_t ldstartB = traits::ldstart(*B.matrix);
|
||||
bool swap_A = (A_trans_=='T');
|
||||
bool swap_B = (B_trans_=='T');
|
||||
|
||||
@@ -735,9 +730,9 @@ private:
|
||||
}
|
||||
|
||||
|
||||
viennacl::scheduler::lhs_rhs_element Acopy = A;
|
||||
viennacl::scheduler::lhs_rhs_element Bcopy = B;
|
||||
viennacl::scheduler::lhs_rhs_element Ccopy = C;
|
||||
scheduler::lhs_rhs_element Acopy = A;
|
||||
scheduler::lhs_rhs_element Bcopy = B;
|
||||
scheduler::lhs_rhs_element Ccopy = C;
|
||||
|
||||
atidlas_int_t lM = M / p_.mL * p_.mL;
|
||||
atidlas_int_t lN = N / p_.nL * p_.nL;
|
||||
@@ -764,14 +759,14 @@ private:
|
||||
{
|
||||
using namespace tools;
|
||||
|
||||
viennacl::scheduler::statement const & st = statements.data().front();
|
||||
scheduler::statement const & st = statements.data().front();
|
||||
parse(st, C_idx, C_leaf, alpha_idx, alpha_leaf, A_idx, A_leaf, A_trans, B_idx, B_leaf, B_trans, beta_idx, beta_leaf);
|
||||
viennacl::scheduler::lhs_rhs_element const & A = tools::lhs_rhs_element(st, A_idx, A_leaf);
|
||||
viennacl::scheduler::lhs_rhs_element const & C = tools::lhs_rhs_element(st, C_idx, C_leaf);
|
||||
scheduler::lhs_rhs_element const & A = tools::lhs_rhs_element(st, A_idx, A_leaf);
|
||||
scheduler::lhs_rhs_element const & C = tools::lhs_rhs_element(st, C_idx, C_leaf);
|
||||
|
||||
atidlas_int_t M = call_on_matrix(C, size1_fun());
|
||||
atidlas_int_t N = call_on_matrix(C, size2_fun());
|
||||
atidlas_int_t K = A_trans?call_on_matrix(A, size1_fun()):call_on_matrix(A, size2_fun());
|
||||
atidlas_int_t M = traits::size1(*C.matrix);
|
||||
atidlas_int_t N = traits::size2(*C.matrix);
|
||||
atidlas_int_t K = A_trans?traits::size1(*A.matrix):traits::size2(*A.matrix);
|
||||
|
||||
return tools::make_vector<atidlas_int_t>() << M << N << K;
|
||||
}
|
||||
@@ -796,16 +791,16 @@ public:
|
||||
leaf_t C_leaf=LHS_NODE_TYPE, A_leaf=LHS_NODE_TYPE, B_leaf=LHS_NODE_TYPE, alpha_leaf=LHS_NODE_TYPE, beta_leaf=LHS_NODE_TYPE;
|
||||
std::vector<atidlas_int_t> MNK = infos(statements,A_trans,B_trans,C_idx,A_idx,B_idx,alpha_idx,beta_idx,C_leaf,A_leaf,B_leaf,alpha_leaf,beta_leaf);
|
||||
|
||||
viennacl::scheduler::statement stcopy = statements.data().front();
|
||||
viennacl::scheduler::lhs_rhs_element& A = tools::lhs_rhs_element(stcopy, A_idx, A_leaf);
|
||||
viennacl::scheduler::lhs_rhs_element& B = tools::lhs_rhs_element(stcopy, B_idx, B_leaf);
|
||||
viennacl::scheduler::lhs_rhs_element& C = tools::lhs_rhs_element(stcopy, C_idx, C_leaf);
|
||||
viennacl::scheduler::lhs_rhs_element& beta = tools::lhs_rhs_element(stcopy, beta_idx, beta_leaf);
|
||||
scheduler::statement stcopy = statements.data().front();
|
||||
scheduler::lhs_rhs_element& A = tools::lhs_rhs_element(stcopy, A_idx, A_leaf);
|
||||
scheduler::lhs_rhs_element& B = tools::lhs_rhs_element(stcopy, B_idx, B_leaf);
|
||||
scheduler::lhs_rhs_element& C = tools::lhs_rhs_element(stcopy, C_idx, C_leaf);
|
||||
scheduler::lhs_rhs_element& beta = tools::lhs_rhs_element(stcopy, beta_idx, beta_leaf);
|
||||
|
||||
if (C.numeric_type==viennacl::scheduler::FLOAT_TYPE)
|
||||
enqueue_impl<float>(&viennacl::scheduler::lhs_rhs_element::matrix_float, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_float, programs, kernel_prefix);
|
||||
else if (C.numeric_type==viennacl::scheduler::DOUBLE_TYPE)
|
||||
enqueue_impl<double>(&viennacl::scheduler::lhs_rhs_element::matrix_double, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_double, programs, kernel_prefix);
|
||||
if (C.numeric_type==scheduler::FLOAT_TYPE)
|
||||
enqueue_impl<float>(&scheduler::lhs_rhs_element::matrix_float, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_float, programs, kernel_prefix);
|
||||
else if (C.numeric_type==scheduler::DOUBLE_TYPE)
|
||||
enqueue_impl<double>(&scheduler::lhs_rhs_element::matrix_double, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_double, programs, kernel_prefix);
|
||||
else
|
||||
throw generator_not_supported_exception("GEMM only supported for float/double");
|
||||
|
||||
|
@@ -4,10 +4,7 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "viennacl/backend/opencl.hpp"
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
#include "viennacl/tools/tools.hpp"
|
||||
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
|
||||
namespace atidlas
|
||||
@@ -33,8 +30,8 @@ private:
|
||||
unsigned int res = 0;
|
||||
for(statements_container::data_type::const_iterator it = statements.data().begin() ; it != statements.data().end() ; ++it)
|
||||
{
|
||||
viennacl::scheduler::statement const & statement = statements.data().front();
|
||||
viennacl::scheduler::statement_node_numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
|
||||
scheduler::statement const & statement = statements.data().front();
|
||||
scheduler::numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type;
|
||||
res += p_.local_size_0*tools::size_of(numeric_type);
|
||||
}
|
||||
return res;
|
||||
@@ -161,7 +158,7 @@ private:
|
||||
accessors["matrix_diag"] = str[a];
|
||||
accessors["scalar"] = "#namereg";
|
||||
std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors);
|
||||
if (exprs[k]->root_node().op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE)
|
||||
if (exprs[k]->root_node().op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE)
|
||||
value+= "*" + exprs[k]->evaluate_recursive(RHS_NODE_TYPE, accessors);
|
||||
|
||||
if (exprs[k]->is_index_reduction())
|
||||
@@ -294,7 +291,7 @@ public:
|
||||
{
|
||||
std::vector<atidlas_int_t> size = input_sizes(statements);
|
||||
|
||||
std::vector<viennacl::scheduler::statement_node const *> reductions;
|
||||
std::vector<scheduler::statement_node const *> reductions;
|
||||
for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
|
||||
{
|
||||
std::vector<size_t> reductions_idx = tools::filter_nodes(&tools::is_reduction, *it, false);
|
||||
@@ -302,7 +299,7 @@ public:
|
||||
reductions.push_back(&it->array()[*itt]);
|
||||
}
|
||||
|
||||
viennacl::scheduler::statement const & statement = statements.data().front();
|
||||
scheduler::statement const & statement = statements.data().front();
|
||||
unsigned int scalartype_size = tools::size_of(lhs_most(statement.array(), statement.root()).lhs.numeric_type);
|
||||
|
||||
viennacl::ocl::kernel * kernels[2];
|
||||
@@ -329,7 +326,7 @@ public:
|
||||
kernels[k]->arg(n_arg++, cl_uint(size[0]));
|
||||
unsigned int i = 0;
|
||||
unsigned int j = 0;
|
||||
for (std::vector<viennacl::scheduler::statement_node const *>::const_iterator it = reductions.begin(); it != reductions.end(); ++it)
|
||||
for (std::vector<scheduler::statement_node const *>::const_iterator it = reductions.begin(); it != reductions.end(); ++it)
|
||||
{
|
||||
if (tools::is_index_reduction((*it)->op))
|
||||
{
|
||||
|
@@ -4,10 +4,8 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
#include "viennacl/tools/tools.hpp"
|
||||
#include "viennacl/scheduler/io.hpp"
|
||||
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/traits/size.hpp"
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
|
||||
namespace atidlas
|
||||
@@ -27,7 +25,7 @@ struct row_wise_reduction_parameters : public template_base::parameters_type
|
||||
class row_wise_reduction_template : public template_base_impl<row_wise_reduction_template, row_wise_reduction_parameters>
|
||||
{
|
||||
private:
|
||||
virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const
|
||||
virtual int check_invalid_impl(cl::Device const &, statements_container const &) const
|
||||
{
|
||||
if (p_.fetch_policy==FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
@@ -39,7 +37,7 @@ private:
|
||||
return p_.local_size_0*(p_.local_size_1+1);
|
||||
}
|
||||
|
||||
static void parse(viennacl::scheduler::statement const & statement, std::vector<size_t> & idx, bool & is_trans, viennacl::scheduler::lhs_rhs_element & matrix)
|
||||
static void parse(scheduler::statement const & statement, std::vector<size_t> & idx, bool & is_trans, scheduler::lhs_rhs_element & matrix)
|
||||
{
|
||||
idx = tools::filter_nodes(&tools::is_reduction, statement, false);
|
||||
is_trans = is_node_trans(statement.array(), idx[0], LHS_NODE_TYPE);
|
||||
@@ -125,7 +123,7 @@ private:
|
||||
accessors["vector"] = str[a];
|
||||
accessors["scalar"] = "#namereg";
|
||||
std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors);
|
||||
if (exprs[k]->root_node().op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE)
|
||||
if (exprs[k]->root_node().op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE)
|
||||
value+= "*" + exprs[k]->evaluate_recursive(RHS_NODE_TYPE, accessors);
|
||||
|
||||
if (exprs[k]->is_index_reduction())
|
||||
@@ -201,7 +199,7 @@ private:
|
||||
for (mit = mappings.begin(), sit = statements.data().begin(); mit != mappings.end(); ++mit, ++sit)
|
||||
{
|
||||
std::vector<size_t> idx;
|
||||
viennacl::scheduler::lhs_rhs_element A;
|
||||
scheduler::lhs_rhs_element A;
|
||||
parse(*sit, idx, is_trans, A);
|
||||
for (unsigned int j = 0; j < idx.size(); ++j)
|
||||
exprs.push_back((mapped_row_wise_reduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get()));
|
||||
@@ -222,10 +220,10 @@ private:
|
||||
std::vector<atidlas_int_t> infos(statements_container const & statements, bool & is_trans)
|
||||
{
|
||||
std::vector<size_t> idx;
|
||||
viennacl::scheduler::lhs_rhs_element A;
|
||||
scheduler::lhs_rhs_element A;
|
||||
parse(statements.data().front(), idx, is_trans, A);
|
||||
atidlas_int_t M = tools::call_on_matrix(A, tools::size1_fun());
|
||||
atidlas_int_t N = tools::call_on_matrix(A, tools::size2_fun());
|
||||
atidlas_int_t M = traits::size1(*A.matrix);
|
||||
atidlas_int_t N = traits::size2(*A.matrix);
|
||||
if(is_trans)
|
||||
std::swap(M,N);
|
||||
return tools::make_vector<atidlas_int_t>() << M << N;
|
||||
@@ -245,7 +243,7 @@ public:
|
||||
bool is_trans;
|
||||
std::vector<atidlas_int_t> MN = infos(statements, is_trans);
|
||||
|
||||
viennacl::ocl::kernel * kernel;
|
||||
cl::Kernel * kernel;
|
||||
if(is_trans && p_.simd_width>1)
|
||||
{
|
||||
if (has_strided_access(statements))
|
||||
@@ -264,7 +262,7 @@ public:
|
||||
kernel->arg(current_arg++, cl_uint(MN[0]));
|
||||
kernel->arg(current_arg++, cl_uint(MN[1]));
|
||||
set_arguments(statements, *kernel, current_arg);
|
||||
viennacl::ocl::enqueue(*kernel);
|
||||
// cl::CommandQueue().enqueue()
|
||||
}
|
||||
};
|
||||
|
||||
|
@@ -5,13 +5,7 @@
|
||||
#include <list>
|
||||
#include <set>
|
||||
|
||||
#include "viennacl/ocl/kernel.hpp"
|
||||
#include "viennacl/ocl/device.hpp"
|
||||
#include "viennacl/ocl/device_utils.hpp"
|
||||
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
#include "viennacl/scheduler/io.hpp"
|
||||
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/tools/lazy_program_compiler.hpp"
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
#include "atidlas/backend/tools/misc.hpp"
|
||||
@@ -45,94 +39,95 @@ private:
|
||||
/** @brief Functor to map the statements to the types defined in mapped_objects.hpp */
|
||||
class map_functor : public tools::traversal_functor
|
||||
{
|
||||
|
||||
viennacl::scheduler::statement_node_numeric_type numeric_type(viennacl::scheduler::statement const * statement, atidlas_int_t root_idx) const
|
||||
numeric_type get_numeric_type(scheduler::statement const * statement, atidlas_int_t root_idx) const
|
||||
{
|
||||
viennacl::scheduler::statement_node const * root_node = &statement->array()[root_idx];
|
||||
while (root_node->lhs.numeric_type==viennacl::scheduler::INVALID_NUMERIC_TYPE)
|
||||
scheduler::statement_node const * root_node = &statement->array()[root_idx];
|
||||
while (root_node->lhs.numeric_t==INVALID_NUMERIC_TYPE)
|
||||
root_node = &statement->array()[root_node->lhs.node_index];
|
||||
return root_node->lhs.numeric_type;
|
||||
return root_node->lhs.numeric_t;
|
||||
}
|
||||
|
||||
public:
|
||||
typedef tools::shared_ptr<mapped_object> result_type;
|
||||
|
||||
map_functor(symbolic_binder & binder, mapping_type & mapping) : binder_(binder), mapping_(mapping){ }
|
||||
|
||||
/** @brief Binary leaf */
|
||||
template<class T>
|
||||
result_type binary_leaf(viennacl::scheduler::statement const * statement, atidlas_int_t root_idx, mapping_type const * mapping) const
|
||||
tools::shared_ptr<mapped_object> binary_leaf(scheduler::statement const * statement, atidlas_int_t root_idx, mapping_type const * mapping) const
|
||||
{
|
||||
return result_type(new T(tools::numeric_type_to_string(numeric_type(statement,root_idx)), binder_.get(NULL), mapped_object::node_info(mapping, statement, root_idx)));
|
||||
return tools::shared_ptr<mapped_object>(new T(tools::numeric_type_to_string(get_numeric_type(statement,root_idx)), binder_.get(NULL), mapped_object::node_info(mapping, statement, root_idx)));
|
||||
}
|
||||
|
||||
template<class NumericT>
|
||||
result_type operator()(NumericT const & /*scalar*/) const
|
||||
{
|
||||
return result_type(new mapped_host_scalar(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
|
||||
}
|
||||
// template<class NumericT>
|
||||
// tools::shared_ptr<mapped_object> operator()(NumericT const & /*scalar*/) const
|
||||
// {
|
||||
// return tools::shared_ptr<mapped_object>(new mapped_host_scalar(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
|
||||
// }
|
||||
|
||||
/** @brief Scalar mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::scalar<NumericT> const & scal) const
|
||||
{
|
||||
return result_type(new mapped_scalar(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(scal))));
|
||||
}
|
||||
// /** @brief Scalar mapping */
|
||||
// template<class NumericT>
|
||||
// tools::shared_ptr<mapped_object> operator()(viennacl::scalar<NumericT> const & scal) const
|
||||
// {
|
||||
// return tools::shared_ptr<mapped_object>(new mapped_scalar(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(scal))));
|
||||
// }
|
||||
|
||||
/** @brief Vector mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::vector_base<NumericT> const & vec) const
|
||||
tools::shared_ptr<mapped_object> create_vector(vector_base const & vector) const
|
||||
{ return tools::shared_ptr<mapped_object>(new mapped_vector(tools::numeric_type_to_string(vector.dtype()), binder_.get(&vector.data()))); }
|
||||
|
||||
// /** @brief Implicit vector mapping */
|
||||
// template<class NumericT>
|
||||
// tools::shared_ptr<mapped_object> operator()(viennacl::implicit_vector_base<NumericT> const & /*vec*/) const
|
||||
// {
|
||||
// return tools::shared_ptr<mapped_object>(new mapped_implicit_vector(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
|
||||
// }
|
||||
|
||||
// /** @brief Matrix mapping */
|
||||
// template<class NumericT>
|
||||
// tools::shared_ptr<mapped_object> operator()(viennacl::matrix_base<NumericT> const & mat) const
|
||||
// {
|
||||
// return tools::shared_ptr<mapped_object>(new mapped_matrix(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(mat))));
|
||||
// }
|
||||
|
||||
// /** @brief Implicit matrix mapping */
|
||||
// template<class NumericT>
|
||||
// tools::shared_ptr<mapped_object> operator()(viennacl::implicit_matrix_base<NumericT> const & /*mat*/) const
|
||||
// {
|
||||
// return tools::shared_ptr<mapped_object>(new mapped_implicit_matrix(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
|
||||
// }
|
||||
|
||||
tools::shared_ptr<mapped_object> create(scheduler::lhs_rhs_element const & lhs_rhs) const
|
||||
{
|
||||
return result_type(new mapped_vector(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(vec))));
|
||||
// if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE)
|
||||
return create_vector(*lhs_rhs.vector);
|
||||
}
|
||||
|
||||
/** @brief Implicit vector mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::implicit_vector_base<NumericT> const & /*vec*/) const
|
||||
{
|
||||
return result_type(new mapped_implicit_vector(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
|
||||
}
|
||||
public:
|
||||
|
||||
/** @brief Matrix mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::matrix_base<NumericT> const & mat) const
|
||||
{
|
||||
return result_type(new mapped_matrix(tools::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(mat))));
|
||||
}
|
||||
|
||||
/** @brief Implicit matrix mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::implicit_matrix_base<NumericT> const & /*mat*/) const
|
||||
{
|
||||
return result_type(new mapped_implicit_matrix(tools::type_to_string<NumericT>::value(), binder_.get(NULL)));
|
||||
}
|
||||
map_functor(symbolic_binder & binder, mapping_type & mapping) : binder_(binder), mapping_(mapping){ }
|
||||
|
||||
/** @brief Traversal functor */
|
||||
void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const {
|
||||
void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const {
|
||||
mapping_type::key_type key(root_idx, leaf_t);
|
||||
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
|
||||
if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
mapping_.insert(mapping_type::value_type(key, tools::call_on_element(root_node.lhs, *this)));
|
||||
else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
mapping_.insert(mapping_type::value_type(key, tools::call_on_element(root_node.rhs, *this)));
|
||||
if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
mapping_.insert(mapping_type::value_type(key, create(root_node.lhs)));
|
||||
else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
mapping_.insert(mapping_type::value_type(key, create(root_node.rhs)));
|
||||
else if ( leaf_t== PARENT_NODE_TYPE)
|
||||
{
|
||||
if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
|
||||
if (root_node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_vector_diag>(&statement, root_idx, &mapping_)));
|
||||
else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE)
|
||||
else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_diag>(&statement, root_idx, &mapping_)));
|
||||
else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE)
|
||||
else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_row>(&statement, root_idx, &mapping_)));
|
||||
else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE)
|
||||
else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&statement, root_idx, &mapping_)));
|
||||
else if (is_scalar_reduction(root_node))
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_scalar_reduction>(&statement, root_idx, &mapping_)));
|
||||
else if (is_vector_reduction(root_node))
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_row_wise_reduction>(&statement, root_idx, &mapping_)));
|
||||
else if (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE)
|
||||
else if (root_node.op.type == scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_product>(&statement, root_idx, &mapping_)));
|
||||
else if (root_node.op.type == viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE)
|
||||
else if (root_node.op.type == scheduler::OPERATION_UNARY_TRANS_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_trans>(&statement, root_idx, &mapping_)));
|
||||
}
|
||||
}
|
||||
@@ -148,86 +143,91 @@ private:
|
||||
public:
|
||||
typedef void result_type;
|
||||
|
||||
set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, viennacl::ocl::kernel & kernel) : binder_(binder), current_arg_(current_arg), kernel_(kernel){ }
|
||||
set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, cl::Kernel & kernel) : binder_(binder), current_arg_(current_arg), kernel_(kernel){ }
|
||||
|
||||
template<class NumericT>
|
||||
result_type operator()(NumericT const & scal) const
|
||||
{
|
||||
typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
|
||||
kernel_.arg(current_arg_++, cl_scalartype(scal));
|
||||
}
|
||||
// template<class NumericT>
|
||||
// void operator()(NumericT const & scal) const
|
||||
// {
|
||||
// typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
|
||||
// kernel_.arg(current_arg_++, cl_scalartype(scal));
|
||||
// }
|
||||
|
||||
/** @brief Scalar mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::scalar<NumericT> const & scal) const
|
||||
{
|
||||
if (binder_.bind(&viennacl::traits::handle(scal)))
|
||||
kernel_.arg(current_arg_++, scal.handle().opencl_handle());
|
||||
}
|
||||
// /** @brief Scalar mapping */
|
||||
// template<class NumericT>
|
||||
// void operator()(viennacl::scalar<NumericT> const & scal) const
|
||||
// {
|
||||
// if (binder_.bind(&viennacl::traits::handle(scal)))
|
||||
// kernel_.arg(current_arg_++, scal.handle().opencl_handle());
|
||||
// }
|
||||
|
||||
/** @brief Vector mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::vector_base<NumericT> const & vec) const
|
||||
void set_vector_arguments(vector_base const & v) const
|
||||
{
|
||||
if (binder_.bind(&viennacl::traits::handle(vec)))
|
||||
if (binder_.bind(&v.data()))
|
||||
{
|
||||
kernel_.arg(current_arg_++, vec.handle().opencl_handle());
|
||||
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start(vec)));
|
||||
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride(vec)));
|
||||
kernel_.setArg(current_arg_++, v.data());
|
||||
kernel_.setArg(current_arg_++, cl_uint(v.start()));
|
||||
kernel_.setArg(current_arg_++, cl_uint(v.stride()));
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Implicit vector mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::implicit_vector_base<NumericT> const & vec) const
|
||||
{
|
||||
typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
|
||||
kernel_.arg(current_arg_++, cl_scalartype(vec.value()));
|
||||
if (vec.has_index())
|
||||
kernel_.arg(current_arg_++, cl_uint(vec.index()));
|
||||
}
|
||||
// /** @brief Implicit vector mapping */
|
||||
// template<class NumericT>
|
||||
// void operator()(viennacl::implicit_vector_base<NumericT> const & vec) const
|
||||
// {
|
||||
// typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
|
||||
// kernel_.arg(current_arg_++, cl_scalartype(vec.value()));
|
||||
// if (vec.has_index())
|
||||
// kernel_.arg(current_arg_++, cl_uint(vec.index()));
|
||||
// }
|
||||
|
||||
/** @brief Matrix mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::matrix_base<NumericT> const & mat) const
|
||||
{
|
||||
if (binder_.bind(&viennacl::traits::handle(mat)))
|
||||
{
|
||||
kernel_.arg(current_arg_++, mat.handle().opencl_handle());
|
||||
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::ld(mat)));
|
||||
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat)));
|
||||
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat)));
|
||||
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat)));
|
||||
kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat)));
|
||||
}
|
||||
}
|
||||
// /** @brief Matrix mapping */
|
||||
// template<class NumericT>
|
||||
// void operator()(viennacl::matrix_base<NumericT> const & mat) const
|
||||
// {
|
||||
// if (binder_.bind(&viennacl::traits::handle(mat)))
|
||||
// {
|
||||
// kernel_.arg(current_arg_++, mat.handle().opencl_handle());
|
||||
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::ld(mat)));
|
||||
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat)));
|
||||
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat)));
|
||||
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat)));
|
||||
// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat)));
|
||||
// }
|
||||
// }
|
||||
|
||||
/** @brief Implicit matrix mapping */
|
||||
template<class NumericT>
|
||||
result_type operator()(viennacl::implicit_matrix_base<NumericT> const & mat) const
|
||||
// /** @brief Implicit matrix mapping */
|
||||
// template<class NumericT>
|
||||
// void operator()(viennacl::implicit_matrix_base<NumericT> const & mat) const
|
||||
// {
|
||||
// kernel_.arg(current_arg_++, typename viennacl::result_of::cl_type<NumericT>::type(mat.value()));
|
||||
// }
|
||||
|
||||
void set_arguments(scheduler::lhs_rhs_element const & lhs_rhs) const
|
||||
{
|
||||
kernel_.arg(current_arg_++, typename viennacl::result_of::cl_type<NumericT>::type(mat.value()));
|
||||
// if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE)
|
||||
set_vector_arguments(*lhs_rhs.vector);
|
||||
}
|
||||
|
||||
/** @brief Traversal functor: */
|
||||
void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const
|
||||
void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const
|
||||
{
|
||||
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
tools::call_on_element(root_node.lhs, *this);
|
||||
else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
tools::call_on_element(root_node.rhs, *this);
|
||||
scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
set_arguments(root_node.lhs);
|
||||
else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
set_arguments(root_node.rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
symbolic_binder & binder_;
|
||||
unsigned int & current_arg_;
|
||||
viennacl::ocl::kernel & kernel_;
|
||||
cl::Kernel & kernel_;
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
static inline void compute_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, viennacl::scheduler::op_element const & op)
|
||||
static inline void compute_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, scheduler::op_element const & op)
|
||||
{
|
||||
if (tools::elementwise_function(op))
|
||||
os << acc << "=" << tools::evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl;
|
||||
@@ -235,15 +235,15 @@ protected:
|
||||
os << acc << "= (" << acc << ")" << tools::evaluate(op.type) << "(" << cur << ");" << std::endl;
|
||||
}
|
||||
|
||||
static inline void compute_index_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, viennacl::scheduler::op_element const & op)
|
||||
static inline void compute_index_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, scheduler::op_element const & op)
|
||||
{
|
||||
// os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl;
|
||||
os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl;
|
||||
os << acc_value << "=";
|
||||
if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE) os << "fmax";
|
||||
if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE) os << "max";
|
||||
if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE) os << "fmin";
|
||||
if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE) os << "min";
|
||||
if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE) os << "fmax";
|
||||
if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE) os << "max";
|
||||
if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE) os << "fmin";
|
||||
if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE) os << "min";
|
||||
os << "(" << acc_value << "," << cur_value << ");"<< std::endl;
|
||||
}
|
||||
|
||||
@@ -269,27 +269,27 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
static inline std::string neutral_element(viennacl::scheduler::op_element const & op)
|
||||
static inline std::string neutral_element(scheduler::op_element const & op)
|
||||
{
|
||||
switch (op.type)
|
||||
{
|
||||
case viennacl::scheduler::OPERATION_BINARY_ADD_TYPE : return "0";
|
||||
case viennacl::scheduler::OPERATION_BINARY_MULT_TYPE : return "1";
|
||||
case viennacl::scheduler::OPERATION_BINARY_DIV_TYPE : return "1";
|
||||
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "-INFINITY";
|
||||
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "-INFINITY";
|
||||
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_MAX_TYPE : return "-INFINITY";
|
||||
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "-INFINITY";
|
||||
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "INFINITY";
|
||||
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "INFINITY";
|
||||
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_MIN_TYPE : return "INFINITY";
|
||||
case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "INFINITY";
|
||||
case scheduler::OPERATION_BINARY_ADD_TYPE : return "0";
|
||||
case scheduler::OPERATION_BINARY_MULT_TYPE : return "1";
|
||||
case scheduler::OPERATION_BINARY_DIV_TYPE : return "1";
|
||||
case scheduler::OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "-INFINITY";
|
||||
case scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "-INFINITY";
|
||||
case scheduler::OPERATION_BINARY_ELEMENT_MAX_TYPE : return "-INFINITY";
|
||||
case scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "-INFINITY";
|
||||
case scheduler::OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "INFINITY";
|
||||
case scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "INFINITY";
|
||||
case scheduler::OPERATION_BINARY_ELEMENT_MIN_TYPE : return "INFINITY";
|
||||
case scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "INFINITY";
|
||||
|
||||
default: throw generator_not_supported_exception("Unsupported reduction operator : no neutral element known");
|
||||
}
|
||||
}
|
||||
|
||||
static std::string generate_arguments(std::vector<mapping_type> const & mappings, std::multimap<std::string, std::string> const & accessors, statements_container const & statements)
|
||||
static std::string generate_arguments(std::vector<mapping_type> const & mappings, std::multimap<std::string, std::string> const & accessors, scheduler::statements_container const & statements)
|
||||
{
|
||||
tools::kernel_generation_stream stream;
|
||||
tools::process(stream, PARENT_NODE_TYPE, accessors, statements, mappings);
|
||||
@@ -308,7 +308,7 @@ protected:
|
||||
return "__global " + data_type + "* #pointer, uint #start, uint #stride,";
|
||||
}
|
||||
|
||||
static std::string generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, statements_container const & statements)
|
||||
static std::string generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, scheduler::statements_container const & statements)
|
||||
{
|
||||
return generate_arguments(mappings, tools::create_process_accessors("scalar", "__global #scalartype* #pointer,")
|
||||
("host_scalar", "#scalartype #name,")
|
||||
@@ -320,11 +320,11 @@ protected:
|
||||
|
||||
|
||||
|
||||
void set_arguments(statements_container const & statements, viennacl::ocl::kernel & kernel, unsigned int & current_arg)
|
||||
void set_arguments(scheduler::statements_container const & statements, cl::Kernel & kernel, unsigned int & current_arg)
|
||||
{
|
||||
tools::shared_ptr<symbolic_binder> binder = make_binder(binding_policy_);
|
||||
for (statements_container::data_type::const_iterator itt = statements.data().begin(); itt != statements.data().end(); ++itt)
|
||||
tools::traverse(*itt, itt->root(), set_arguments_functor(*binder,current_arg,kernel), true);
|
||||
for (scheduler::statements_container::data_type::const_iterator itt = statements.data().begin(); itt != statements.data().end(); ++itt)
|
||||
tools::traverse(*itt, itt->root(), set_arguments_functor(*binder, current_arg, kernel), true);
|
||||
}
|
||||
|
||||
class invalid_template_exception : public std::exception
|
||||
@@ -364,18 +364,18 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_node_trans(viennacl::scheduler::statement::container_type const & array, size_t root_idx, leaf_t leaf_type)
|
||||
static bool is_node_trans(scheduler::statement::container_type const & array, size_t root_idx, leaf_t leaf_type)
|
||||
{
|
||||
bool res = false;
|
||||
viennacl::scheduler::lhs_rhs_element viennacl::scheduler::statement_node::*ptr;
|
||||
scheduler::lhs_rhs_element scheduler::statement_node::*ptr;
|
||||
if (leaf_type==LHS_NODE_TYPE)
|
||||
ptr = &viennacl::scheduler::statement_node::lhs;
|
||||
ptr = &scheduler::statement_node::lhs;
|
||||
else
|
||||
ptr = &viennacl::scheduler::statement_node::rhs;
|
||||
viennacl::scheduler::statement_node const * node = &array[root_idx];
|
||||
while ((node->*ptr).type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
ptr = &scheduler::statement_node::rhs;
|
||||
scheduler::statement_node const * node = &array[root_idx];
|
||||
while ((node->*ptr).type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
{
|
||||
if (array[(node->*ptr).node_index].op.type==viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE)
|
||||
if (array[(node->*ptr).node_index].op.type==scheduler::OPERATION_UNARY_TRANS_TYPE)
|
||||
res = !res;
|
||||
node = &array[(node->*ptr).node_index];
|
||||
}
|
||||
@@ -392,28 +392,28 @@ protected:
|
||||
return str + tools::to_string(suffixes[i]);
|
||||
}
|
||||
|
||||
static bool is_offset_modifier(viennacl::scheduler::statement_node const & node)
|
||||
static bool is_offset_modifier(scheduler::statement_node const & node)
|
||||
{
|
||||
return node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE
|
||||
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE
|
||||
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE
|
||||
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE;
|
||||
return node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE
|
||||
|| node.op.type==scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE
|
||||
|| node.op.type==scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE
|
||||
|| node.op.type==scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE;
|
||||
}
|
||||
|
||||
static bool has_strided_access(statements_container const & statements)
|
||||
static bool has_strided_access(scheduler::statements_container const & statements)
|
||||
{
|
||||
for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
|
||||
for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
|
||||
{
|
||||
//checks for vectors
|
||||
std::vector<viennacl::scheduler::lhs_rhs_element> vectors = tools::filter_elements(viennacl::scheduler::DENSE_VECTOR_TYPE, *it);
|
||||
for (std::vector<viennacl::scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
|
||||
if (tools::call_on_vector(*itt, tools::stride_fun())>1)
|
||||
std::vector<scheduler::lhs_rhs_element> vectors = tools::filter_elements(scheduler::DENSE_VECTOR_TYPE, *it);
|
||||
for (std::vector<scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
|
||||
if(itt->vector->stride())
|
||||
return true;
|
||||
|
||||
//checks for matrix
|
||||
std::vector<viennacl::scheduler::lhs_rhs_element> matrices = tools::filter_elements(viennacl::scheduler::DENSE_MATRIX_TYPE, *it);
|
||||
for (std::vector<viennacl::scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
|
||||
if (tools::call_on_matrix(*itt, tools::stride1_fun())>1 || tools::call_on_matrix(*itt, tools::stride2_fun())>2)
|
||||
std::vector<scheduler::lhs_rhs_element> matrices = tools::filter_elements(scheduler::DENSE_MATRIX_TYPE, *it);
|
||||
for (std::vector<scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
|
||||
if (itt->matrix->stride1() > 1 || itt->matrix->stride2() > 1)
|
||||
return true;
|
||||
|
||||
if(tools::filter_nodes(&is_offset_modifier, *it, true).empty()==false)
|
||||
@@ -422,42 +422,42 @@ protected:
|
||||
return false;
|
||||
}
|
||||
|
||||
static atidlas_int_t vector_size(viennacl::scheduler::statement_node const & node, bool up_to_internal_size)
|
||||
static atidlas_int_t vector_size(scheduler::statement_node const & node, bool up_to_internal_size)
|
||||
{
|
||||
using namespace viennacl::scheduler;
|
||||
using namespace scheduler;
|
||||
using namespace tools;
|
||||
|
||||
atidlas_int_t (vector_base::*funsize)(void) const = up_to_internal_size?&vector_base::internal_size:&vector_base::size;
|
||||
atidlas_int_t (matrix_base::*funsize1)(void) const = up_to_internal_size?&matrix_base::internal_size1:&matrix_base::size1;
|
||||
atidlas_int_t (matrix_base::*funsize2)(void) const = up_to_internal_size?&matrix_base::internal_size2:&matrix_base::size2;
|
||||
|
||||
if (node.op.type==OPERATION_BINARY_MATRIX_DIAG_TYPE)
|
||||
{
|
||||
atidlas_int_t size1 = up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun());
|
||||
atidlas_int_t size2 = up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun());
|
||||
return std::min<atidlas_int_t>(size1, size2);
|
||||
}
|
||||
return std::min<atidlas_int_t>((node.lhs.matrix->*funsize1)(), (node.lhs.matrix->*funsize2)());
|
||||
else if (node.op.type==OPERATION_BINARY_MATRIX_ROW_TYPE)
|
||||
return up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun());
|
||||
return (node.lhs.matrix->*funsize2)();
|
||||
else if (node.op.type==OPERATION_BINARY_MATRIX_COLUMN_TYPE)
|
||||
return up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun());
|
||||
return (node.lhs.matrix->*funsize1)();
|
||||
else
|
||||
return up_to_internal_size?call_on_vector(node.lhs, internal_size_fun()):call_on_vector(node.lhs, size_fun());
|
||||
return (node.lhs.vector->*funsize)();
|
||||
|
||||
}
|
||||
|
||||
static std::pair<atidlas_int_t, atidlas_int_t> matrix_size(viennacl::scheduler::statement_node const & node, bool up_to_internal_size)
|
||||
static std::pair<atidlas_int_t, atidlas_int_t> matrix_size(scheduler::statement_node const & node, bool up_to_internal_size)
|
||||
{
|
||||
using namespace tools;
|
||||
if (node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
|
||||
atidlas_int_t (vector_base::*funsize)() const = up_to_internal_size?&vector_base::internal_size:&vector_base::size;
|
||||
atidlas_int_t (matrix_base::*funsize1)() const = up_to_internal_size?&matrix_base::internal_size1:&matrix_base::size1;
|
||||
atidlas_int_t (matrix_base::*funsize2)() const = up_to_internal_size?&matrix_base::internal_size2:&matrix_base::size2;
|
||||
|
||||
if (node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
|
||||
{
|
||||
atidlas_int_t is = call_on_vector(node.lhs, internal_size_fun());
|
||||
atidlas_int_t s = call_on_vector(node.lhs, size_fun());
|
||||
return up_to_internal_size?std::make_pair(is,is):std::make_pair(s,s);
|
||||
atidlas_int_t size = (node.lhs.vector->*funsize)();
|
||||
return std::make_pair(size,size);
|
||||
}
|
||||
else
|
||||
{
|
||||
atidlas_int_t size1 = up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun());
|
||||
atidlas_int_t size2 = up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun());
|
||||
return std::make_pair(size1, size2);
|
||||
}
|
||||
return std::make_pair((node.lhs.matrix->*funsize1)(), (node.lhs.matrix->*funsize2)());
|
||||
}
|
||||
|
||||
//NB : templates are not used here because declaring a functor out of the generate() functions would be harder to read
|
||||
struct loop_body_base
|
||||
{
|
||||
virtual void operator()(tools::kernel_generation_stream & stream, unsigned int simd_width) const = 0;
|
||||
@@ -507,22 +507,22 @@ protected:
|
||||
|
||||
private:
|
||||
/** @brief Generates the body of the associated kernel function */
|
||||
virtual std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mapping) const = 0;
|
||||
virtual std::vector<std::string> generate_impl(std::string const & kernel_prefix, scheduler::statements_container const & statements, std::vector<mapping_type> const & mapping) const = 0;
|
||||
|
||||
public:
|
||||
template_base(binding_policy_t binding_policy) : binding_policy_(binding_policy) {}
|
||||
|
||||
virtual unsigned int lmem_usage(statements_container const &) const { return 0; }
|
||||
virtual unsigned int lmem_usage(scheduler::statements_container const &) const { return 0; }
|
||||
|
||||
virtual unsigned int registers_usage(statements_container const &) const { return 0; }
|
||||
virtual unsigned int registers_usage(scheduler::statements_container const &) const { return 0; }
|
||||
|
||||
virtual std::vector<atidlas_int_t> input_sizes(statements_container const & statements) = 0;
|
||||
virtual std::vector<atidlas_int_t> input_sizes(scheduler::statements_container const & statements) = 0;
|
||||
|
||||
virtual ~template_base(){ }
|
||||
|
||||
std::vector<std::string> generate(std::string const & kernel_prefix, statements_container const & statements, viennacl::ocl::device const & device)
|
||||
std::vector<std::string> generate(std::string const & kernel_prefix, scheduler::statements_container const & statements, cl::Device const & device)
|
||||
{
|
||||
statements_container::data_type::const_iterator sit;
|
||||
scheduler::statements_container::data_type::const_iterator sit;
|
||||
std::vector<mapping_type>::iterator mit;
|
||||
|
||||
if(int err = check_invalid(statements, device))
|
||||
@@ -538,9 +538,9 @@ public:
|
||||
}
|
||||
|
||||
/** @brief returns whether or not the profile has undefined behavior on particular device */
|
||||
virtual int check_invalid(statements_container const & statements, viennacl::ocl::device const & device) const = 0;
|
||||
virtual int check_invalid(scheduler::statements_container const & statements, cl::Device const & device) const = 0;
|
||||
|
||||
virtual void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements) = 0;
|
||||
virtual void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, scheduler::statements_container const & statements) = 0;
|
||||
|
||||
virtual tools::shared_ptr<template_base> clone() const = 0;
|
||||
|
||||
@@ -553,23 +553,23 @@ template<class TemplateType, class ParametersType>
|
||||
class template_base_impl : public template_base
|
||||
{
|
||||
private:
|
||||
virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const { return TEMPLATE_VALID; }
|
||||
virtual int check_invalid_impl(cl::Device const &, scheduler::statements_container const &) const { return TEMPLATE_VALID; }
|
||||
|
||||
protected:
|
||||
bool has_misaligned_offset(statements_container const & statements)
|
||||
bool has_misaligned_offset(scheduler::statements_container const & statements)
|
||||
{
|
||||
for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
|
||||
for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
|
||||
{
|
||||
//checks for vectors
|
||||
std::vector<viennacl::scheduler::lhs_rhs_element> vectors = tools::filter_elements(viennacl::scheduler::DENSE_VECTOR_TYPE, *it);
|
||||
for (std::vector<viennacl::scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
|
||||
if (tools::call_on_vector(*itt, tools::stride_fun())>1)
|
||||
std::vector<scheduler::lhs_rhs_element> vectors = tools::filter_elements(scheduler::DENSE_VECTOR_TYPE, *it);
|
||||
for (std::vector<scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
|
||||
if (itt->vector->stride()>1)
|
||||
return true;
|
||||
|
||||
//checks for matrix
|
||||
std::vector<viennacl::scheduler::lhs_rhs_element> matrices = tools::filter_elements(viennacl::scheduler::DENSE_MATRIX_TYPE, *it);
|
||||
for (std::vector<viennacl::scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
|
||||
if (tools::call_on_matrix(*itt, tools::stride1_fun())>1 || tools::call_on_matrix(*itt, tools::stride2_fun())>2)
|
||||
std::vector<scheduler::lhs_rhs_element> matrices = tools::filter_elements(scheduler::DENSE_MATRIX_TYPE, *it);
|
||||
for (std::vector<scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
|
||||
if (itt->matrix->stride1()>1 || itt->matrix->stride2()>1)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@@ -588,19 +588,17 @@ public:
|
||||
{ return tools::shared_ptr<template_base>(new TemplateType(*dynamic_cast<TemplateType const *>(this))); }
|
||||
|
||||
/** @brief returns whether or not the profile has undefined behavior on particular device */
|
||||
int check_invalid(statements_container const & statements, viennacl::ocl::device const & device) const
|
||||
int check_invalid(scheduler::statements_container const & statements, cl::Device const & device) const
|
||||
{
|
||||
using namespace viennacl::tools;
|
||||
|
||||
//Query device informations
|
||||
size_t lmem_available = static_cast<size_t>(device.local_mem_size());
|
||||
size_t lmem_available = device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
|
||||
size_t lmem_used = lmem_usage(statements);
|
||||
if (lmem_used>lmem_available)
|
||||
return TEMPLATE_LOCAL_MEMORY_OVERFLOW;
|
||||
|
||||
//Invalid work group size
|
||||
size_t max_workgroup_size = device.max_work_group_size();
|
||||
std::vector<size_t> max_work_item_sizes = device.max_work_item_sizes();
|
||||
size_t max_workgroup_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
|
||||
std::vector<size_t> max_work_item_sizes = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
|
||||
if (p_.local_size_0*p_.local_size_1 > max_workgroup_size)
|
||||
return TEMPLATE_WORK_GROUP_SIZE_OVERFLOW;
|
||||
if (p_.local_size_0 > max_work_item_sizes[0])
|
||||
@@ -611,12 +609,12 @@ public:
|
||||
|
||||
//Advice from the Intel guide
|
||||
unsigned int warp_size = 8;
|
||||
if (device.type()==CL_DEVICE_TYPE_GPU)
|
||||
if (device.getInfo<CL_DEVICE_TYPE>()==CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
//Advice from the nvidia guide
|
||||
warp_size = 32;
|
||||
//Advice from the AMD guide
|
||||
if (device.vendor_id()==4098)
|
||||
if (device.getInfo<CL_DEVICE_VENDOR_ID>()==4098)
|
||||
warp_size = 64;
|
||||
}
|
||||
if (((p_.local_size_0*p_.local_size_1)%warp_size)>0)
|
||||
|
@@ -4,9 +4,7 @@
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
#include "viennacl/tools/tools.hpp"
|
||||
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
|
||||
namespace atidlas
|
||||
@@ -25,14 +23,14 @@ public:
|
||||
class vector_axpy_template : public template_base_impl<vector_axpy_template, vector_axpy_parameters>
|
||||
{
|
||||
private:
|
||||
virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const
|
||||
virtual int check_invalid_impl(cl::Device const &, scheduler::statements_container const &) const
|
||||
{
|
||||
if (p_.fetching_policy==FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings) const
|
||||
std::vector<std::string> generate_impl(std::string const & kernel_prefix, scheduler::statements_container const & statements, std::vector<mapping_type> const & mappings) const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
for (unsigned int i = 0; i < 2; ++i)
|
||||
@@ -95,28 +93,28 @@ public:
|
||||
void up_to_internal_size(bool v)
|
||||
{ up_to_internal_size_ = v; }
|
||||
|
||||
std::vector<atidlas_int_t> input_sizes(statements_container const & statements)
|
||||
std::vector<atidlas_int_t> input_sizes(scheduler::statements_container const & statements)
|
||||
{
|
||||
viennacl::scheduler::statement const & statement = statements.data().front();
|
||||
scheduler::statement const & statement = statements.data().front();
|
||||
atidlas_int_t size = vector_size(lhs_most(statement.array(), statement.root()), up_to_internal_size_);
|
||||
return tools::make_vector<atidlas_int_t>() << size;
|
||||
}
|
||||
|
||||
void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements)
|
||||
void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, scheduler::statements_container const & statements)
|
||||
{
|
||||
atidlas_int_t size = input_sizes(statements)[0];
|
||||
std::string kfallback = kernel_prefix;
|
||||
kfallback+='0';
|
||||
std::string kopt = kernel_prefix;
|
||||
kopt+='1';
|
||||
bool fallback = p_.simd_width > 1 && (has_strided_access(statements) || (size%p_.simd_width>0) || has_misaligned_offset(statements));
|
||||
viennacl::ocl::kernel * kernel = &programs[fallback?0:1].program().get_kernel(fallback?kfallback:kopt);
|
||||
kernel->local_work_size(0, p_.local_size_0);
|
||||
kernel->global_work_size(0, p_.local_size_0*p_.num_groups);
|
||||
unsigned int current_arg = 0;
|
||||
kernel->arg(current_arg++, static_cast<cl_uint>(size));
|
||||
set_arguments(statements, *kernel, current_arg);
|
||||
viennacl::ocl::enqueue(*kernel);
|
||||
// atidlas_int_t size = input_sizes(statements)[0];
|
||||
// std::string kfallback = kernel_prefix;
|
||||
// kfallback+='0';
|
||||
// std::string kopt = kernel_prefix;
|
||||
// kopt+='1';
|
||||
// bool fallback = p_.simd_width > 1 && (has_strided_access(statements) || (size%p_.simd_width>0) || has_misaligned_offset(statements));
|
||||
// cl::Kernel * kernel = &programs[fallback?0:1].program().get_kernel(fallback?kfallback:kopt);
|
||||
// kernel->local_work_size(0, p_.local_size_0);
|
||||
// kernel->global_work_size(0, p_.local_size_0*p_.num_groups);
|
||||
// unsigned int current_arg = 0;
|
||||
// kernel->arg(current_arg++, static_cast<cl_uint>(size));
|
||||
// set_arguments(statements, *kernel, current_arg);
|
||||
// cl::CommandQueue().enqueueNDRangeKernel(kernel);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@@ -4,377 +4,56 @@
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include "viennacl/matrix_def.hpp"
|
||||
#include "viennacl/vector_def.hpp"
|
||||
|
||||
#include "viennacl/ocl/forwards.h"
|
||||
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
|
||||
#include "viennacl/traits/size.hpp"
|
||||
#include "viennacl/traits/handle.hpp"
|
||||
|
||||
#include "atidlas/tools/to_string.hpp"
|
||||
#include "atidlas/forwards.h"
|
||||
#include "atidlas/backend/forwards.h"
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/tools/find_and_replace.hpp"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
namespace tools
|
||||
{
|
||||
|
||||
template<class T>
|
||||
T median(std::vector<T> x)
|
||||
inline std::string numeric_type_to_string(numeric_type const & type)
|
||||
{
|
||||
size_t size = x.size();
|
||||
std::sort(x.begin(), x.end());
|
||||
if (size % 2 == 0)
|
||||
return (x[size / 2 - 1] + x[size / 2]) / 2;
|
||||
else
|
||||
return x[size / 2];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class make_vector {
|
||||
public:
|
||||
typedef make_vector<T> my_type;
|
||||
my_type& operator<< (const T& val) {
|
||||
data_.push_back(val);
|
||||
return *this;
|
||||
}
|
||||
operator std::vector<T>() const {
|
||||
return data_;
|
||||
}
|
||||
private:
|
||||
std::vector<T> data_;
|
||||
};
|
||||
|
||||
//CUDA Conversion
|
||||
inline std::string opencl_source_to_cuda_source(std::string const & opencl_src)
|
||||
{
|
||||
std::string res = opencl_src;
|
||||
|
||||
viennacl::tools::find_and_replace(res,"__attribute__","//__attribute__");
|
||||
|
||||
//Pointer
|
||||
viennacl::tools::find_and_replace(res, "__global float*", "float*");
|
||||
viennacl::tools::find_and_replace(res, "__local float*", "float*");
|
||||
|
||||
viennacl::tools::find_and_replace(res, "__global double*", "double*");
|
||||
viennacl::tools::find_and_replace(res, "__local double*", "double*");
|
||||
|
||||
//Qualifiers
|
||||
viennacl::tools::find_and_replace(res,"__global","__device__");
|
||||
viennacl::tools::find_and_replace(res,"__kernel","__global__");
|
||||
viennacl::tools::find_and_replace(res,"__constant","__constant__");
|
||||
viennacl::tools::find_and_replace(res,"__local","__shared__");
|
||||
|
||||
//Indexing
|
||||
viennacl::tools::find_and_replace(res,"get_num_groups(0)","gridDim.x");
|
||||
viennacl::tools::find_and_replace(res,"get_num_groups(1)","gridDim.y");
|
||||
|
||||
viennacl::tools::find_and_replace(res,"get_local_size(0)","blockDim.x");
|
||||
viennacl::tools::find_and_replace(res,"get_local_size(1)","blockDim.y");
|
||||
|
||||
viennacl::tools::find_and_replace(res,"get_group_id(0)","blockIdx.x");
|
||||
viennacl::tools::find_and_replace(res,"get_group_id(1)","blockIdx.y");
|
||||
|
||||
viennacl::tools::find_and_replace(res,"get_local_id(0)","threadIdx.x");
|
||||
viennacl::tools::find_and_replace(res,"get_local_id(1)","threadIdx.y");
|
||||
|
||||
viennacl::tools::find_and_replace(res,"get_global_id(0)","(blockIdx.x*blockDim.x + threadIdx.x)");
|
||||
viennacl::tools::find_and_replace(res,"get_global_id(1)","(blockIdx.y*blockDim.y + threadIdx.y)");
|
||||
|
||||
//Synchronization
|
||||
viennacl::tools::find_and_replace(res,"barrier(CLK_LOCAL_MEM_FENCE)","__syncthreads()");
|
||||
viennacl::tools::find_and_replace(res,"barrier(CLK_GLOBAL_MEM_FENCE)","__syncthreads()");
|
||||
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static std::string numeric_type_to_string(viennacl::scheduler::statement_node_numeric_type const & type){
|
||||
switch (type)
|
||||
{
|
||||
//case viennacl::scheduler::CHAR_TYPE: return "char";
|
||||
//case viennacl::scheduler::UCHAR_TYPE: return "unsigned char";
|
||||
//case viennacl::scheduler::SHORT_TYPE: return "short";
|
||||
//case viennacl::scheduler::USHORT_TYPE: return "unsigned short";
|
||||
case viennacl::scheduler::INT_TYPE: return "int";
|
||||
case viennacl::scheduler::UINT_TYPE: return "unsigned int";
|
||||
case viennacl::scheduler::LONG_TYPE: return "long";
|
||||
case viennacl::scheduler::ULONG_TYPE: return "unsigned long";
|
||||
case viennacl::scheduler::FLOAT_TYPE : return "float";
|
||||
case viennacl::scheduler::DOUBLE_TYPE : return "double";
|
||||
//case CHAR_TYPE: return "char";
|
||||
//case UCHAR_TYPE: return "uchar";
|
||||
//case SHORT_TYPE: return "short";
|
||||
//case USHORT_TYPE: return "ushort";
|
||||
case INT_TYPE: return "int";
|
||||
case UINT_TYPE: return "uint";
|
||||
case LONG_TYPE: return "long";
|
||||
case ULONG_TYPE: return "ulong";
|
||||
case FLOAT_TYPE : return "float";
|
||||
case DOUBLE_TYPE : return "double";
|
||||
default : throw generator_not_supported_exception("Unsupported Scalartype");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Fun>
|
||||
static typename Fun::result_type call_on_host_scalar(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
|
||||
assert(element.type_family == viennacl::scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a host scalar"));
|
||||
switch (element.numeric_type)
|
||||
{
|
||||
//case viennacl::scheduler::CHAR_TYPE: return fun(element.host_char);
|
||||
//case viennacl::scheduler::UCHAR_TYPE: return fun(element.host_uchar);
|
||||
//case viennacl::scheduler::SHORT_TYPE: return fun(element.host_short);
|
||||
//case viennacl::scheduler::USHORT_TYPE: return fun(element.host_ushort);
|
||||
case viennacl::scheduler::INT_TYPE: return fun(element.host_int);
|
||||
case viennacl::scheduler::UINT_TYPE: return fun(element.host_uint);
|
||||
case viennacl::scheduler::LONG_TYPE: return fun(element.host_long);
|
||||
case viennacl::scheduler::ULONG_TYPE: return fun(element.host_ulong);
|
||||
case viennacl::scheduler::FLOAT_TYPE : return fun(element.host_float);
|
||||
case viennacl::scheduler::DOUBLE_TYPE : return fun(element.host_double);
|
||||
default : throw generator_not_supported_exception("Unsupported Scalartype");
|
||||
}
|
||||
}
|
||||
|
||||
template<class Fun>
|
||||
static typename Fun::result_type call_on_scalar(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
|
||||
assert(element.type_family == viennacl::scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a scalar"));
|
||||
switch (element.numeric_type)
|
||||
{
|
||||
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.scalar_char);
|
||||
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.scalar_uchar);
|
||||
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.scalar_short);
|
||||
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.scalar_ushort);
|
||||
case viennacl::scheduler::INT_TYPE: return fun(*element.scalar_int);
|
||||
case viennacl::scheduler::UINT_TYPE: return fun(*element.scalar_uint);
|
||||
case viennacl::scheduler::LONG_TYPE: return fun(*element.scalar_long);
|
||||
case viennacl::scheduler::ULONG_TYPE: return fun(*element.scalar_ulong);
|
||||
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.scalar_float);
|
||||
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.scalar_double);
|
||||
default : throw generator_not_supported_exception("Unsupported Scalartype");
|
||||
}
|
||||
}
|
||||
|
||||
template<class Fun>
|
||||
static typename Fun::result_type call_on_vector(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
|
||||
assert(element.type_family == viennacl::scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a vector"));
|
||||
switch (element.numeric_type)
|
||||
{
|
||||
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.vector_char);
|
||||
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.vector_uchar);
|
||||
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.vector_short);
|
||||
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.vector_ushort);
|
||||
case viennacl::scheduler::INT_TYPE: return fun(*element.vector_int);
|
||||
case viennacl::scheduler::UINT_TYPE: return fun(*element.vector_uint);
|
||||
case viennacl::scheduler::LONG_TYPE: return fun(*element.vector_long);
|
||||
case viennacl::scheduler::ULONG_TYPE: return fun(*element.vector_ulong);
|
||||
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.vector_float);
|
||||
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.vector_double);
|
||||
default : throw generator_not_supported_exception("Unsupported Scalartype");
|
||||
}
|
||||
}
|
||||
|
||||
template<class Fun>
|
||||
static typename Fun::result_type call_on_implicit_vector(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
|
||||
assert(element.type_family == viennacl::scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a implicit_vector"));
|
||||
assert(element.subtype == viennacl::scheduler::IMPLICIT_VECTOR_TYPE && bool("Must be called on a implicit_vector"));
|
||||
switch (element.numeric_type)
|
||||
{
|
||||
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.implicit_vector_char);
|
||||
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.implicit_vector_uchar);
|
||||
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.implicit_vector_short);
|
||||
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.implicit_vector_ushort);
|
||||
case viennacl::scheduler::INT_TYPE: return fun(*element.implicit_vector_int);
|
||||
case viennacl::scheduler::UINT_TYPE: return fun(*element.implicit_vector_uint);
|
||||
case viennacl::scheduler::LONG_TYPE: return fun(*element.implicit_vector_long);
|
||||
case viennacl::scheduler::ULONG_TYPE: return fun(*element.implicit_vector_ulong);
|
||||
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.implicit_vector_float);
|
||||
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.implicit_vector_double);
|
||||
default : throw generator_not_supported_exception("Unsupported Scalartype");
|
||||
}
|
||||
}
|
||||
|
||||
template<class Fun>
|
||||
static typename Fun::result_type call_on_matrix(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
|
||||
assert(element.type_family == viennacl::scheduler::MATRIX_TYPE_FAMILY && bool("Must be called on a matrix"));
|
||||
switch (element.numeric_type)
|
||||
{
|
||||
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.matrix_char);
|
||||
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.matrix_uchar);
|
||||
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.matrix_short);
|
||||
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.matrix_ushort);
|
||||
case viennacl::scheduler::INT_TYPE: return fun(*element.matrix_int);
|
||||
case viennacl::scheduler::UINT_TYPE: return fun(*element.matrix_uint);
|
||||
case viennacl::scheduler::LONG_TYPE: return fun(*element.matrix_long);
|
||||
case viennacl::scheduler::ULONG_TYPE: return fun(*element.matrix_ulong);
|
||||
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.matrix_float);
|
||||
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.matrix_double);
|
||||
default : throw generator_not_supported_exception("Unsupported Scalartype");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Fun>
|
||||
static typename Fun::result_type call_on_implicit_matrix(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){
|
||||
assert(element.subtype == viennacl::scheduler::IMPLICIT_MATRIX_TYPE && bool("Must be called on a implicit matrix"));
|
||||
switch (element.numeric_type)
|
||||
{
|
||||
//case viennacl::scheduler::CHAR_TYPE: return fun(*element.implicit_matrix_char);
|
||||
//case viennacl::scheduler::UCHAR_TYPE: return fun(*element.implicit_matrix_uchar);
|
||||
//case viennacl::scheduler::SHORT_TYPE: return fun(*element.implicit_matrix_short);
|
||||
//case viennacl::scheduler::USHORT_TYPE: return fun(*element.implicit_matrix_ushort);
|
||||
case viennacl::scheduler::INT_TYPE: return fun(*element.implicit_matrix_int);
|
||||
case viennacl::scheduler::UINT_TYPE: return fun(*element.implicit_matrix_uint);
|
||||
case viennacl::scheduler::LONG_TYPE: return fun(*element.implicit_matrix_long);
|
||||
case viennacl::scheduler::ULONG_TYPE: return fun(*element.implicit_matrix_ulong);
|
||||
case viennacl::scheduler::FLOAT_TYPE : return fun(*element.implicit_matrix_float);
|
||||
case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.implicit_matrix_double);
|
||||
default : throw generator_not_supported_exception("Unsupported Scalartype");
|
||||
}
|
||||
}
|
||||
|
||||
template<class Fun>
|
||||
static typename Fun::result_type call_on_element(viennacl::scheduler::lhs_rhs_element const & element, Fun const & fun){
|
||||
switch (element.type_family)
|
||||
{
|
||||
case viennacl::scheduler::SCALAR_TYPE_FAMILY:
|
||||
if (element.subtype == viennacl::scheduler::HOST_SCALAR_TYPE)
|
||||
return call_on_host_scalar(element, fun);
|
||||
else
|
||||
return call_on_scalar(element, fun);
|
||||
case viennacl::scheduler::VECTOR_TYPE_FAMILY :
|
||||
if (element.subtype == viennacl::scheduler::IMPLICIT_VECTOR_TYPE)
|
||||
return call_on_implicit_vector(element, fun);
|
||||
else
|
||||
return call_on_vector(element, fun);
|
||||
case viennacl::scheduler::MATRIX_TYPE_FAMILY:
|
||||
if (element.subtype == viennacl::scheduler::IMPLICIT_MATRIX_TYPE)
|
||||
return call_on_implicit_matrix(element, fun);
|
||||
else
|
||||
return call_on_matrix(element,fun);
|
||||
default:
|
||||
throw generator_not_supported_exception("Unsupported datastructure type : Not among {Scalar, Vector, Matrix}");
|
||||
}
|
||||
}
|
||||
|
||||
struct scalartype_size_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
result_type operator()(float const &) const { return sizeof(float); }
|
||||
result_type operator()(double const &) const { return sizeof(double); }
|
||||
template<class T> result_type operator()(T const &) const { return sizeof(typename viennacl::result_of::cpu_value_type<T>::type); }
|
||||
};
|
||||
|
||||
struct internal_size_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::internal_size(t); }
|
||||
};
|
||||
|
||||
struct size_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::size(t); }
|
||||
};
|
||||
|
||||
struct start_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::start(t); }
|
||||
};
|
||||
|
||||
|
||||
struct stride_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride(t); }
|
||||
};
|
||||
|
||||
struct start1_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::start1(t); }
|
||||
};
|
||||
|
||||
struct start2_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::start2(t); }
|
||||
};
|
||||
|
||||
struct leading_stride_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride1(t); }
|
||||
};
|
||||
|
||||
struct leading_start_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::start1(t); }
|
||||
};
|
||||
|
||||
struct stride1_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride1(t); }
|
||||
};
|
||||
|
||||
struct stride2_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride2(t); }
|
||||
};
|
||||
|
||||
struct handle_fun
|
||||
{
|
||||
typedef cl_mem result_type;
|
||||
template<class T>
|
||||
result_type operator()(T const &t) const { return viennacl::traits::opencl_handle(t); }
|
||||
};
|
||||
|
||||
struct internal_size1_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T>
|
||||
result_type operator()(T const &t) const { return viennacl::traits::internal_size1(t); }
|
||||
};
|
||||
|
||||
struct internal_size2_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T>
|
||||
result_type operator()(T const &t) const { return viennacl::traits::internal_size2(t); }
|
||||
};
|
||||
|
||||
struct size1_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T>
|
||||
result_type operator()(T const &t) const { return viennacl::traits::size1(t); }
|
||||
};
|
||||
|
||||
struct size2_fun
|
||||
{
|
||||
typedef atidlas_int_t result_type;
|
||||
template<class T>
|
||||
result_type operator()(T const &t) const { return viennacl::traits::size2(t); }
|
||||
};
|
||||
|
||||
template<class T, class U>
|
||||
struct is_same_type { enum { value = 0 }; };
|
||||
|
||||
template<class T>
|
||||
struct is_same_type<T,T> { enum { value = 1 }; };
|
||||
|
||||
inline bool is_reduction(viennacl::scheduler::statement_node const & node)
|
||||
inline bool is_reduction(scheduler::statement_node const & node)
|
||||
{
|
||||
return node.op.type_family==viennacl::scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY
|
||||
|| node.op.type_family==viennacl::scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY
|
||||
|| node.op.type_family==viennacl::scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
|
||||
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE
|
||||
|| node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE;
|
||||
return node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY
|
||||
|| node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY
|
||||
|| node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
|
||||
|| node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE
|
||||
|| node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE;
|
||||
}
|
||||
|
||||
inline bool is_index_reduction(viennacl::scheduler::op_element const & op)
|
||||
inline bool is_index_reduction(scheduler::op_element const & op)
|
||||
{
|
||||
return op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE
|
||||
|| op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE
|
||||
|| op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE
|
||||
|| op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE;
|
||||
return op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE
|
||||
|| op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE
|
||||
|| op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE
|
||||
|| op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE;
|
||||
}
|
||||
template<class T>
|
||||
struct type_to_string;
|
||||
@@ -434,9 +113,9 @@ private:
|
||||
std::ostringstream oss;
|
||||
};
|
||||
|
||||
inline bool node_leaf(viennacl::scheduler::op_element const & op)
|
||||
inline bool node_leaf(scheduler::op_element const & op)
|
||||
{
|
||||
using namespace viennacl::scheduler;
|
||||
using namespace scheduler;
|
||||
return op.type==OPERATION_UNARY_NORM_1_TYPE
|
||||
|| op.type==OPERATION_UNARY_NORM_2_TYPE
|
||||
|| op.type==OPERATION_UNARY_NORM_INF_TYPE
|
||||
@@ -453,9 +132,9 @@ inline bool node_leaf(viennacl::scheduler::op_element const & op)
|
||||
|| op.type_family==OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY;
|
||||
}
|
||||
|
||||
inline bool elementwise_operator(viennacl::scheduler::op_element const & op)
|
||||
inline bool elementwise_operator(scheduler::op_element const & op)
|
||||
{
|
||||
using namespace viennacl::scheduler;
|
||||
using namespace scheduler;
|
||||
return op.type== OPERATION_BINARY_ASSIGN_TYPE
|
||||
|| op.type== OPERATION_BINARY_INPLACE_ADD_TYPE
|
||||
|| op.type== OPERATION_BINARY_INPLACE_SUB_TYPE
|
||||
@@ -467,9 +146,9 @@ inline bool elementwise_operator(viennacl::scheduler::op_element const & op)
|
||||
|| op.type== OPERATION_BINARY_DIV_TYPE;
|
||||
}
|
||||
|
||||
inline bool elementwise_function(viennacl::scheduler::op_element const & op)
|
||||
inline bool elementwise_function(scheduler::op_element const & op)
|
||||
{
|
||||
using namespace viennacl::scheduler;
|
||||
using namespace scheduler;
|
||||
return
|
||||
|
||||
op.type == OPERATION_UNARY_CAST_CHAR_TYPE
|
||||
@@ -516,37 +195,13 @@ inline bool elementwise_function(viennacl::scheduler::op_element const & op)
|
||||
|
||||
}
|
||||
|
||||
inline viennacl::scheduler::lhs_rhs_element & lhs_rhs_element(viennacl::scheduler::statement const & st, atidlas_int_t idx, leaf_t leaf)
|
||||
inline scheduler::lhs_rhs_element & lhs_rhs_element(scheduler::statement const & st, atidlas_int_t idx, leaf_t leaf)
|
||||
{
|
||||
using namespace tools;
|
||||
assert(leaf==LHS_NODE_TYPE || leaf==RHS_NODE_TYPE);
|
||||
if (leaf==LHS_NODE_TYPE)
|
||||
return const_cast<viennacl::scheduler::lhs_rhs_element &>(st.array()[idx].lhs);
|
||||
return const_cast<viennacl::scheduler::lhs_rhs_element &>(st.array()[idx].rhs);
|
||||
}
|
||||
|
||||
inline unsigned int size_of(viennacl::scheduler::statement_node_numeric_type type)
|
||||
{
|
||||
using namespace viennacl::scheduler;
|
||||
switch (type)
|
||||
{
|
||||
case UCHAR_TYPE:
|
||||
case CHAR_TYPE: return 1;
|
||||
|
||||
case USHORT_TYPE:
|
||||
case SHORT_TYPE:
|
||||
case HALF_TYPE: return 2;
|
||||
|
||||
case UINT_TYPE:
|
||||
case INT_TYPE:
|
||||
case FLOAT_TYPE: return 4;
|
||||
|
||||
case ULONG_TYPE:
|
||||
case LONG_TYPE:
|
||||
case DOUBLE_TYPE: return 8;
|
||||
|
||||
default: throw generator_not_supported_exception("Unsupported scalartype");
|
||||
}
|
||||
return const_cast<scheduler::lhs_rhs_element &>(st.array()[idx].lhs);
|
||||
return const_cast<scheduler::lhs_rhs_element &>(st.array()[idx].rhs);
|
||||
}
|
||||
|
||||
inline std::string append_width(std::string const & str, unsigned int width)
|
||||
@@ -584,6 +239,20 @@ private:
|
||||
typedef create_map<std::multimap<std::string, std::string> > create_process_accessors;
|
||||
typedef create_map<std::map<std::string, std::string> > create_evaluate_accessors;
|
||||
|
||||
template <typename T>
|
||||
class make_vector {
|
||||
public:
|
||||
typedef make_vector<T> my_type;
|
||||
my_type& operator<< (const T& val) {
|
||||
data_.push_back(val);
|
||||
return *this;
|
||||
}
|
||||
operator std::vector<T>() const {
|
||||
return data_;
|
||||
}
|
||||
private:
|
||||
std::vector<T> data_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -5,9 +5,7 @@
|
||||
#include <set>
|
||||
#include "CL/cl.h"
|
||||
|
||||
#include "viennacl/forwards.h"
|
||||
#include "viennacl/scheduler/forwards.h"
|
||||
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/backend/mapped_objects.hpp"
|
||||
#include "atidlas/backend/tools/misc.hpp"
|
||||
#include "atidlas/forwards.h"
|
||||
@@ -22,15 +20,15 @@ namespace tools
|
||||
class traversal_functor
|
||||
{
|
||||
public:
|
||||
void call_before_expansion(viennacl::scheduler::statement const &, atidlas_int_t) const { }
|
||||
void call_after_expansion(viennacl::scheduler::statement const &, atidlas_int_t) const { }
|
||||
void call_before_expansion(scheduler::statement const &, atidlas_int_t) const { }
|
||||
void call_after_expansion(scheduler::statement const &, atidlas_int_t) const { }
|
||||
};
|
||||
|
||||
/** @brief Recursively execute a functor on a statement */
|
||||
template<class Fun>
|
||||
inline void traverse(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect)
|
||||
inline void traverse(scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect)
|
||||
{
|
||||
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
bool recurse = tools::node_leaf(root_node.op)?inspect:true;
|
||||
|
||||
fun.call_before_expansion(statement, root_idx);
|
||||
@@ -38,9 +36,9 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i
|
||||
//Lhs:
|
||||
if (recurse)
|
||||
{
|
||||
if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
traverse(statement, root_node.lhs.node_index, fun, inspect);
|
||||
if (root_node.lhs.type_family != viennacl::scheduler::INVALID_TYPE_FAMILY)
|
||||
if (root_node.lhs.type_family != scheduler::INVALID_TYPE_FAMILY)
|
||||
fun(statement, root_idx, LHS_NODE_TYPE);
|
||||
}
|
||||
|
||||
@@ -48,11 +46,11 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i
|
||||
fun(statement, root_idx, PARENT_NODE_TYPE);
|
||||
|
||||
//Rhs:
|
||||
if (recurse && root_node.rhs.type_family!=viennacl::scheduler::INVALID_TYPE_FAMILY)
|
||||
if (recurse && root_node.rhs.type_family!=scheduler::INVALID_TYPE_FAMILY)
|
||||
{
|
||||
if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
traverse(statement, root_node.rhs.node_index, fun, inspect);
|
||||
if (root_node.rhs.type_family != viennacl::scheduler::INVALID_TYPE_FAMILY)
|
||||
if (root_node.rhs.type_family != scheduler::INVALID_TYPE_FAMILY)
|
||||
fun(statement, root_idx, RHS_NODE_TYPE);
|
||||
}
|
||||
|
||||
@@ -62,13 +60,13 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i
|
||||
class filter_fun : public traversal_functor
|
||||
{
|
||||
public:
|
||||
typedef bool (*pred_t)(viennacl::scheduler::statement_node const & node);
|
||||
typedef bool (*pred_t)(scheduler::statement_node const & node);
|
||||
|
||||
filter_fun(pred_t pred, std::vector<size_t> & out) : pred_(pred), out_(out){ }
|
||||
|
||||
void operator()(viennacl::scheduler::statement const & statement, size_t root_idx, leaf_t) const
|
||||
void operator()(scheduler::statement const & statement, size_t root_idx, leaf_t) const
|
||||
{
|
||||
viennacl::scheduler::statement_node const * root_node = &statement.array()[root_idx];
|
||||
scheduler::statement_node const * root_node = &statement.array()[root_idx];
|
||||
if (pred_(*root_node))
|
||||
out_.push_back(root_idx);
|
||||
}
|
||||
@@ -77,7 +75,7 @@ private:
|
||||
std::vector<size_t> & out_;
|
||||
};
|
||||
|
||||
inline std::vector<size_t> filter_nodes(bool (*pred)(viennacl::scheduler::statement_node const & node), viennacl::scheduler::statement const & statement, bool inspect)
|
||||
inline std::vector<size_t> filter_nodes(bool (*pred)(scheduler::statement_node const & node), scheduler::statement const & statement, bool inspect)
|
||||
{
|
||||
std::vector<size_t> res;
|
||||
tools::traverse(statement, statement.root(), filter_fun(pred, res), inspect);
|
||||
@@ -87,32 +85,32 @@ inline std::vector<size_t> filter_nodes(bool (*pred)(viennacl::scheduler::statem
|
||||
class filter_elements_fun : public traversal_functor
|
||||
{
|
||||
public:
|
||||
filter_elements_fun(viennacl::scheduler::statement_node_subtype subtype, std::vector<viennacl::scheduler::lhs_rhs_element> & out) : subtype_(subtype), out_(out) { }
|
||||
filter_elements_fun(scheduler::statement_node_subtype subtype, std::vector<scheduler::lhs_rhs_element> & out) : subtype_(subtype), out_(out) { }
|
||||
|
||||
void operator()(viennacl::scheduler::statement const & statement, size_t root_idx, leaf_t) const
|
||||
void operator()(scheduler::statement const & statement, size_t root_idx, leaf_t) const
|
||||
{
|
||||
viennacl::scheduler::statement_node const * root_node = &statement.array()[root_idx];
|
||||
scheduler::statement_node const * root_node = &statement.array()[root_idx];
|
||||
if (root_node->lhs.subtype==subtype_)
|
||||
out_.push_back(root_node->lhs);
|
||||
if (root_node->rhs.subtype==subtype_)
|
||||
out_.push_back(root_node->rhs);
|
||||
}
|
||||
private:
|
||||
viennacl::scheduler::statement_node_subtype subtype_;
|
||||
std::vector<viennacl::scheduler::lhs_rhs_element> & out_;
|
||||
scheduler::statement_node_subtype subtype_;
|
||||
std::vector<scheduler::lhs_rhs_element> & out_;
|
||||
};
|
||||
|
||||
inline std::vector<viennacl::scheduler::lhs_rhs_element> filter_elements(viennacl::scheduler::statement_node_subtype subtype, viennacl::scheduler::statement const & statement)
|
||||
inline std::vector<scheduler::lhs_rhs_element> filter_elements(scheduler::statement_node_subtype subtype, scheduler::statement const & statement)
|
||||
{
|
||||
std::vector<viennacl::scheduler::lhs_rhs_element> res;
|
||||
std::vector<scheduler::lhs_rhs_element> res;
|
||||
tools::traverse(statement, statement.root(), filter_elements_fun(subtype, res), true);
|
||||
return res;
|
||||
}
|
||||
|
||||
/** @brief generate a string from an operation_node_type */
|
||||
inline const char * evaluate(viennacl::scheduler::operation_node_type type)
|
||||
inline const char * evaluate(scheduler::operation_node_type type)
|
||||
{
|
||||
using namespace viennacl::scheduler;
|
||||
using namespace scheduler;
|
||||
// unary expression
|
||||
switch (type)
|
||||
{
|
||||
@@ -194,9 +192,9 @@ inline const char * evaluate(viennacl::scheduler::operation_node_type type)
|
||||
}
|
||||
}
|
||||
|
||||
inline const char * operator_string(viennacl::scheduler::operation_node_type type)
|
||||
inline const char * operator_string(scheduler::operation_node_type type)
|
||||
{
|
||||
using namespace viennacl::scheduler;
|
||||
using namespace scheduler;
|
||||
switch (type)
|
||||
{
|
||||
case OPERATION_UNARY_CAST_CHAR_TYPE : return "char";
|
||||
@@ -237,24 +235,24 @@ private:
|
||||
public:
|
||||
evaluate_expression_traversal(std::map<std::string, std::string> const & accessors, std::string & str, mapping_type const & mapping) : accessors_(accessors), str_(str), mapping_(mapping){ }
|
||||
|
||||
void call_before_expansion(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx) const
|
||||
void call_before_expansion(scheduler::statement const & statement, atidlas_int_t root_idx) const
|
||||
{
|
||||
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
if ((root_node.op.type_family==viennacl::scheduler::OPERATION_UNARY_TYPE_FAMILY || tools::elementwise_function(root_node.op))
|
||||
scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
if ((root_node.op.type_family==scheduler::OPERATION_UNARY_TYPE_FAMILY || tools::elementwise_function(root_node.op))
|
||||
&& !tools::node_leaf(root_node.op))
|
||||
str_+=tools::evaluate(root_node.op.type);
|
||||
str_+="(";
|
||||
|
||||
}
|
||||
|
||||
void call_after_expansion(viennacl::scheduler::statement const & /*statement*/, atidlas_int_t /*root_idx*/) const
|
||||
void call_after_expansion(scheduler::statement const & /*statement*/, atidlas_int_t /*root_idx*/) const
|
||||
{
|
||||
str_+=")";
|
||||
}
|
||||
|
||||
void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf) const
|
||||
void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf) const
|
||||
{
|
||||
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
mapping_type::key_type key = std::make_pair(root_idx, leaf);
|
||||
if (leaf==PARENT_NODE_TYPE)
|
||||
{
|
||||
@@ -262,20 +260,20 @@ public:
|
||||
str_ += mapping_.at(key)->evaluate(accessors_);
|
||||
else if (tools::elementwise_operator(root_node.op))
|
||||
str_ += tools::evaluate(root_node.op.type);
|
||||
else if (root_node.op.type_family!=viennacl::scheduler::OPERATION_UNARY_TYPE_FAMILY && tools::elementwise_function(root_node.op))
|
||||
else if (root_node.op.type_family!=scheduler::OPERATION_UNARY_TYPE_FAMILY && tools::elementwise_function(root_node.op))
|
||||
str_ += ",";
|
||||
}
|
||||
else
|
||||
{
|
||||
if (leaf==LHS_NODE_TYPE)
|
||||
{
|
||||
if (root_node.lhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
if (root_node.lhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
str_ += mapping_.at(key)->evaluate(accessors_);
|
||||
}
|
||||
|
||||
if (leaf==RHS_NODE_TYPE)
|
||||
{
|
||||
if (root_node.rhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
if (root_node.rhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
str_ += mapping_.at(key)->evaluate(accessors_);
|
||||
}
|
||||
}
|
||||
@@ -283,22 +281,22 @@ public:
|
||||
};
|
||||
|
||||
inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors,
|
||||
viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, mapping_type const & mapping)
|
||||
scheduler::statement const & statement, atidlas_int_t root_idx, mapping_type const & mapping)
|
||||
{
|
||||
std::string res;
|
||||
evaluate_expression_traversal traversal_functor(accessors, res, mapping);
|
||||
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
|
||||
if (leaf==RHS_NODE_TYPE)
|
||||
{
|
||||
if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
tools::traverse(statement, root_node.rhs.node_index, traversal_functor, false);
|
||||
else
|
||||
traversal_functor(statement, root_idx, leaf);
|
||||
}
|
||||
else if (leaf==LHS_NODE_TYPE)
|
||||
{
|
||||
if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
tools::traverse(statement, root_node.lhs.node_index, traversal_functor, false);
|
||||
else
|
||||
traversal_functor(statement, root_idx, leaf);
|
||||
@@ -310,9 +308,9 @@ inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> cons
|
||||
}
|
||||
|
||||
inline void evaluate(tools::kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors,
|
||||
statements_container const & statements, std::vector<mapping_type> const & mappings)
|
||||
scheduler::statements_container const & statements, std::vector<mapping_type> const & mappings)
|
||||
{
|
||||
statements_container::data_type::const_iterator sit;
|
||||
scheduler::statements_container::data_type::const_iterator sit;
|
||||
std::vector<mapping_type>::const_iterator mit;
|
||||
|
||||
for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++mit, ++sit)
|
||||
@@ -327,7 +325,7 @@ public:
|
||||
process_traversal(std::multimap<std::string, std::string> const & accessors, tools::kernel_generation_stream & stream,
|
||||
mapping_type const & mapping, std::set<std::string> & already_processed) : accessors_(accessors), stream_(stream), mapping_(mapping), already_processed_(already_processed){ }
|
||||
|
||||
void operator()(viennacl::scheduler::statement const & /*statement*/, atidlas_int_t root_idx, leaf_t leaf) const
|
||||
void operator()(scheduler::statement const & /*statement*/, atidlas_int_t root_idx, leaf_t leaf) const
|
||||
{
|
||||
mapping_type::const_iterator it = mapping_.find(std::make_pair(root_idx, leaf));
|
||||
if (it!=mapping_.end())
|
||||
@@ -353,21 +351,21 @@ private:
|
||||
};
|
||||
|
||||
inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap<std::string, std::string> const & accessors,
|
||||
viennacl::scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed)
|
||||
scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed)
|
||||
{
|
||||
process_traversal traversal_functor(accessors, stream, mapping, already_processed);
|
||||
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
|
||||
if (leaf==RHS_NODE_TYPE)
|
||||
{
|
||||
if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
tools::traverse(statement, root_node.rhs.node_index, traversal_functor, true);
|
||||
else
|
||||
traversal_functor(statement, root_idx, leaf);
|
||||
}
|
||||
else if (leaf==LHS_NODE_TYPE)
|
||||
{
|
||||
if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
tools::traverse(statement, root_node.lhs.node_index, traversal_functor, true);
|
||||
else
|
||||
traversal_functor(statement, root_idx, leaf);
|
||||
@@ -379,9 +377,9 @@ inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::
|
||||
}
|
||||
|
||||
inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap<std::string, std::string> const & accessors,
|
||||
statements_container const & statements, std::vector<mapping_type> const & mappings)
|
||||
scheduler::statements_container const & statements, std::vector<mapping_type> const & mappings)
|
||||
{
|
||||
statements_container::data_type::const_iterator sit;
|
||||
scheduler::statements_container::data_type::const_iterator sit;
|
||||
std::vector<mapping_type>::const_iterator mit;
|
||||
std::set<std::string> already_processed;
|
||||
|
||||
@@ -404,78 +402,83 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
typedef void result_type;
|
||||
/** @brief Vector mapping */
|
||||
inline void append(atidlas::vector_base const & vec) const
|
||||
{
|
||||
*ptr_++='v'; //vector
|
||||
*ptr_++=(char)vec.dtype();
|
||||
append_id(ptr_, binder_.get(&vec.data()));
|
||||
}
|
||||
|
||||
inline void append(scheduler::lhs_rhs_element const & lhs_rhs) const
|
||||
{
|
||||
if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE)
|
||||
append(*lhs_rhs.vector);
|
||||
}
|
||||
|
||||
public:
|
||||
statement_representation_functor(symbolic_binder & binder, char *& ptr) : binder_(binder), ptr_(ptr){ }
|
||||
|
||||
template<class NumericT>
|
||||
inline result_type operator()(NumericT const & /*scal*/) const
|
||||
{
|
||||
*ptr_++='h'; //host
|
||||
*ptr_++='s'; //scalar
|
||||
*ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
}
|
||||
// template<class NumericT>
|
||||
// inline result_type operator()(NumericT const & /*scal*/) const
|
||||
// {
|
||||
// *ptr_++='h'; //host
|
||||
// *ptr_++='s'; //scalar
|
||||
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
// }
|
||||
|
||||
/** @brief Scalar mapping */
|
||||
template<class NumericT>
|
||||
inline result_type operator()(viennacl::scalar<NumericT> const & scal) const
|
||||
{
|
||||
*ptr_++='s'; //scalar
|
||||
*ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
append_id(ptr_, binder_.get(&viennacl::traits::handle(scal)));
|
||||
}
|
||||
// /** @brief Scalar mapping */
|
||||
// template<class NumericT>
|
||||
// inline result_type operator()(viennacl::scalar<NumericT> const & scal) const
|
||||
// {
|
||||
// *ptr_++='s'; //scalar
|
||||
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
// append_id(ptr_, binder_.get(&viennacl::traits::handle(scal)));
|
||||
// }
|
||||
|
||||
/** @brief Vector mapping */
|
||||
template<class NumericT>
|
||||
inline result_type operator()(viennacl::vector_base<NumericT> const & vec) const
|
||||
{
|
||||
*ptr_++='v'; //vector
|
||||
*ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
append_id(ptr_, binder_.get(&viennacl::traits::handle(vec)));
|
||||
}
|
||||
|
||||
/** @brief Implicit vector mapping */
|
||||
template<class NumericT>
|
||||
inline result_type operator()(viennacl::implicit_vector_base<NumericT> const & /*vec*/) const
|
||||
{
|
||||
*ptr_++='i'; //implicit
|
||||
*ptr_++='v'; //vector
|
||||
*ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
}
|
||||
|
||||
/** @brief Matrix mapping */
|
||||
template<class NumericT>
|
||||
inline result_type operator()(viennacl::matrix_base<NumericT> const & mat) const
|
||||
{
|
||||
*ptr_++='m'; //Matrix
|
||||
*ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
append_id(ptr_, binder_.get(&viennacl::traits::handle(mat)));
|
||||
}
|
||||
// /** @brief Implicit vector mapping */
|
||||
// template<class NumericT>
|
||||
// inline result_type operator()(viennacl::implicit_vector_base<NumericT> const & /*vec*/) const
|
||||
// {
|
||||
// *ptr_++='i'; //implicit
|
||||
// *ptr_++='v'; //vector
|
||||
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
// }
|
||||
|
||||
/** @brief Implicit matrix mapping */
|
||||
template<class NumericT>
|
||||
inline result_type operator()(viennacl::implicit_matrix_base<NumericT> const & /*mat*/) const
|
||||
{
|
||||
*ptr_++='i'; //implicit
|
||||
*ptr_++='m'; //matrix
|
||||
*ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
}
|
||||
// /** @brief Matrix mapping */
|
||||
// template<class NumericT>
|
||||
// inline result_type operator()(viennacl::matrix_base<NumericT> const & mat) const
|
||||
// {
|
||||
// *ptr_++='m'; //Matrix
|
||||
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
// append_id(ptr_, binder_.get(&viennacl::traits::handle(mat)));
|
||||
// }
|
||||
|
||||
static inline void append(char*& p, const char * str)
|
||||
// /** @brief Implicit matrix mapping */
|
||||
// template<class NumericT>
|
||||
// inline result_type operator()(viennacl::implicit_matrix_base<NumericT> const & /*mat*/) const
|
||||
// {
|
||||
// *ptr_++='i'; //implicit
|
||||
// *ptr_++='m'; //matrix
|
||||
// *ptr_++=tools::first_letter_of_type<NumericT>::value();
|
||||
// }
|
||||
|
||||
inline void append(char*& p, const char * str) const
|
||||
{
|
||||
std::size_t n = std::strlen(str);
|
||||
std::memcpy(p, str, n);
|
||||
p+=n;
|
||||
}
|
||||
|
||||
inline void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const
|
||||
inline void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const
|
||||
{
|
||||
viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
tools::call_on_element(root_node.lhs, *this);
|
||||
else if (root_node.op.type_family==viennacl::scheduler::OPERATION_BINARY_TYPE_FAMILY && leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
tools::call_on_element(root_node.rhs, *this);
|
||||
scheduler::statement_node const & root_node = statement.array()[root_idx];
|
||||
if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
append(root_node.lhs);
|
||||
else if (root_node.op.type_family==scheduler::OPERATION_BINARY_TYPE_FAMILY && leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
|
||||
append(root_node.rhs);
|
||||
else if (leaf_t==PARENT_NODE_TYPE)
|
||||
append_id(ptr_,root_node.op.type);
|
||||
}
|
||||
@@ -485,16 +488,16 @@ private:
|
||||
char *& ptr_;
|
||||
};
|
||||
|
||||
inline std::string statements_representation(statements_container const & statements, binding_policy_t binding_policy)
|
||||
inline std::string statements_representation(scheduler::statements_container const & statements, binding_policy_t binding_policy)
|
||||
{
|
||||
std::vector<char> program_name_vector(256);
|
||||
char* program_name = program_name_vector.data();
|
||||
if (statements.order()==statements_container::INDEPENDENT)
|
||||
if (statements.order()==scheduler::statements_container::INDEPENDENT)
|
||||
*program_name++='i';
|
||||
else
|
||||
*program_name++='s';
|
||||
tools::shared_ptr<symbolic_binder> binder = make_binder(binding_policy);
|
||||
for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
|
||||
for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
|
||||
tools::traverse(*it, it->root(), tools::statement_representation_functor(*binder, program_name),true);
|
||||
*program_name='\0';
|
||||
return std::string(program_name_vector.data());
|
||||
|
17
atidlas/detail/vector_def.hpp
Normal file
17
atidlas/detail/vector_def.hpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#ifndef ATIDLAS_VECTOR_HPP_
|
||||
#define ATIDLAS_VECTOR_HPP_
|
||||
|
||||
#include "CL/cl.hpp"
|
||||
|
||||
#include "atidlas/forwards.h"
|
||||
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
80
atidlas/expression_template.hpp
Normal file
80
atidlas/expression_template.hpp
Normal file
@@ -0,0 +1,80 @@
|
||||
#ifndef ATIDLAS_EXPRESSION_TEMPLATE_HPP_
|
||||
#define ATIDLAS_EXPRESSION_TEMPLATE_HPP_
|
||||
|
||||
#include "atidlas/forwards.h"
|
||||
#include "atidlas/traits/size.hpp"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
template<typename T>
|
||||
struct reference_if_nonscalar
|
||||
{
|
||||
typedef T & type;
|
||||
};
|
||||
|
||||
#define ATIDLAS_REFERENCE_IF_NONSCALAR(TNAME) \
|
||||
template<> struct reference_if_nonscalar<TNAME> { typedef TNAME type; }; \
|
||||
template<> struct reference_if_nonscalar<const TNAME> { typedef const TNAME type; };
|
||||
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(char)
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(short)
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(int)
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(long)
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned char)
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned short)
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned int)
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned long)
|
||||
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(float)
|
||||
ATIDLAS_REFERENCE_IF_NONSCALAR(double)
|
||||
#undef ATIDLAS_REFERENCE_IF_NONSCALAR
|
||||
|
||||
}
|
||||
|
||||
/** @brief An expression template class that represents a binary operation
|
||||
* @tparam LHS left hand side operand
|
||||
* @tparam RHS right hand side operand
|
||||
* @tparam OP the operator
|
||||
*/
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
class expression_template
|
||||
{
|
||||
typedef typename detail::reference_if_nonscalar<LHS>::type lhs_reference_type;
|
||||
typedef typename detail::reference_if_nonscalar<RHS>::type rhs_reference_type;
|
||||
public:
|
||||
expression_template(LHS & l, RHS & r) : lhs_(l), rhs_(r) {}
|
||||
/** @brief Get left hand side operand */
|
||||
lhs_reference_type lhs() const { return lhs_; }
|
||||
/** @brief Get right hand side operand */
|
||||
rhs_reference_type rhs() const { return rhs_; }
|
||||
/** @brief Returns the size of the result vector */
|
||||
atidlas_int_t size() const { return traits::size(*this); }
|
||||
private:
|
||||
/** @brief The left hand side operand */
|
||||
lhs_reference_type lhs_;
|
||||
/** @brief The right hand side operand */
|
||||
rhs_reference_type rhs_;
|
||||
};
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
struct vector_expression: public expression_template<LHS, RHS, OP>{
|
||||
vector_expression(LHS & l, RHS & r) : expression_template<LHS, RHS, OP>(l, r){ }
|
||||
};
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
class matrix_expression: public expression_template<LHS, RHS, OP>{
|
||||
matrix_expression(LHS & l, RHS & r) : expression_template<LHS, RHS, OP>(l, r){ }
|
||||
};
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
class scalar_expression: public expression_template<LHS, RHS, OP>{
|
||||
scalar_expression(LHS & l, RHS & r) : expression_template<LHS, RHS, OP>(l, r){ }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,10 +1,285 @@
|
||||
#ifndef ATIDLAS_FORWARDS_H
|
||||
#define ATIDLAS_FORWARDS_H
|
||||
|
||||
#include "CL/cl.hpp"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
typedef int atidlas_int_t;
|
||||
|
||||
enum numeric_type
|
||||
{
|
||||
INVALID_NUMERIC_TYPE = 0,
|
||||
CHAR_TYPE,
|
||||
UCHAR_TYPE,
|
||||
SHORT_TYPE,
|
||||
USHORT_TYPE,
|
||||
INT_TYPE,
|
||||
UINT_TYPE,
|
||||
LONG_TYPE,
|
||||
ULONG_TYPE,
|
||||
HALF_TYPE,
|
||||
FLOAT_TYPE,
|
||||
DOUBLE_TYPE
|
||||
};
|
||||
|
||||
inline unsigned int size_of(numeric_type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case UCHAR_TYPE:
|
||||
case CHAR_TYPE: return 1;
|
||||
|
||||
case USHORT_TYPE:
|
||||
case SHORT_TYPE:
|
||||
case HALF_TYPE: return 2;
|
||||
|
||||
case UINT_TYPE:
|
||||
case INT_TYPE:
|
||||
case FLOAT_TYPE: return 4;
|
||||
|
||||
case ULONG_TYPE:
|
||||
case LONG_TYPE:
|
||||
case DOUBLE_TYPE: return 8;
|
||||
|
||||
default: throw "Unsupported numeric type";
|
||||
}
|
||||
}
|
||||
|
||||
template<class LHS, class RHS, class OP>
|
||||
class vector_expression;
|
||||
|
||||
class vector_base
|
||||
{
|
||||
public:
|
||||
vector_base(atidlas_int_t size, numeric_type dtype, cl::Context context) : dtype_(dtype), size_(size), internal_size_(size), start_(0), stride_(1), context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*internal_size()) {}
|
||||
vector_base(cl::Buffer data, atidlas_int_t size, numeric_type dtype, atidlas_int_t start, atidlas_int_t stride): dtype_(dtype), size_(size), internal_size_(size), start_(start), stride_(stride), context_(data.getInfo<CL_MEM_CONTEXT>()), data_(data){ }
|
||||
|
||||
numeric_type dtype() const { return dtype_; }
|
||||
atidlas_int_t size() const { return size_; }
|
||||
atidlas_int_t internal_size() const { return internal_size_; }
|
||||
atidlas_int_t start() const { return start_; }
|
||||
atidlas_int_t stride() const { return stride_; }
|
||||
|
||||
template<class LHS, class RHS, class OP>
|
||||
vector_base& operator=(vector_expression<LHS, RHS, OP> const &);
|
||||
|
||||
cl::Context const & context() const { return context_; }
|
||||
cl::Buffer const & data() const { return data_; }
|
||||
|
||||
|
||||
private:
|
||||
numeric_type dtype_;
|
||||
|
||||
atidlas_int_t size_;
|
||||
atidlas_int_t internal_size_;
|
||||
atidlas_int_t start_;
|
||||
atidlas_int_t stride_;
|
||||
|
||||
cl::Context context_;
|
||||
cl::Buffer data_;
|
||||
};
|
||||
|
||||
class matrix_base
|
||||
{
|
||||
public:
|
||||
matrix_base(atidlas_int_t size1, atidlas_int_t size2, numeric_type dtype, cl::Context context) : dtype_(dtype), size1_(size1), internal_size1_(size1), start1_(0), stride1_(1),
|
||||
size2_(size2), internal_size2_(size2), start2_(0), stride2_(2),
|
||||
context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*internal_size()) {}
|
||||
matrix_base(cl::Buffer data, atidlas_int_t size1, atidlas_int_t start1, atidlas_int_t stride1,
|
||||
atidlas_int_t size2, atidlas_int_t start2, atidlas_int_t stride2,
|
||||
numeric_type dtype): dtype_(dtype), size1_(size1), start1_(start1), stride1_(stride1),
|
||||
size2_(size2), start2_(start2), stride2_(stride2), context_(data.getInfo<CL_MEM_CONTEXT>()), data_(data){ }
|
||||
|
||||
numeric_type dtype() const { return dtype_; }
|
||||
|
||||
atidlas_int_t size1() const { return size1_; }
|
||||
atidlas_int_t internal_size1() const { return size1_; }
|
||||
atidlas_int_t start1() const { return start1_; }
|
||||
atidlas_int_t stride1() const { return stride1_; }
|
||||
|
||||
atidlas_int_t size2() const { return size2_; }
|
||||
atidlas_int_t internal_size2() const { return size2_; }
|
||||
atidlas_int_t start2() const { return start2_; }
|
||||
atidlas_int_t stride2() const { return stride2_; }
|
||||
|
||||
atidlas_int_t internal_size() const { return internal_size1_*internal_size2_; }
|
||||
|
||||
cl::Context const & context() const { return context_; }
|
||||
cl::Buffer const & data() const { return data_; }
|
||||
|
||||
|
||||
private:
|
||||
numeric_type dtype_;
|
||||
|
||||
atidlas_int_t size1_;
|
||||
atidlas_int_t internal_size1_;
|
||||
atidlas_int_t start1_;
|
||||
atidlas_int_t stride1_;
|
||||
|
||||
atidlas_int_t size2_;
|
||||
atidlas_int_t internal_size2_;
|
||||
atidlas_int_t start2_;
|
||||
atidlas_int_t stride2_;
|
||||
|
||||
cl::Context context_;
|
||||
cl::Buffer data_;
|
||||
};
|
||||
|
||||
/** @brief A tag class representing assignment */
|
||||
struct op_assign {};
|
||||
/** @brief A tag class representing inplace addition */
|
||||
struct op_inplace_add {};
|
||||
/** @brief A tag class representing inplace subtraction */
|
||||
struct op_inplace_sub {};
|
||||
|
||||
/** @brief A tag class representing addition */
|
||||
struct op_add {};
|
||||
/** @brief A tag class representing subtraction */
|
||||
struct op_sub {};
|
||||
/** @brief A tag class representing multiplication by a scalar */
|
||||
struct op_mult {};
|
||||
/** @brief A tag class representing matrix-vector products and element-wise multiplications*/
|
||||
struct op_prod {};
|
||||
/** @brief A tag class representing matrix-matrix products */
|
||||
struct op_mat_mat_prod {};
|
||||
/** @brief A tag class representing division */
|
||||
struct op_div {};
|
||||
/** @brief A tag class representing the power function */
|
||||
struct op_pow {};
|
||||
|
||||
/** @brief A tag class representing equality */
|
||||
struct op_eq {};
|
||||
/** @brief A tag class representing inequality */
|
||||
struct op_neq {};
|
||||
/** @brief A tag class representing greater-than */
|
||||
struct op_greater {};
|
||||
/** @brief A tag class representing less-than */
|
||||
struct op_less {};
|
||||
/** @brief A tag class representing greater-than-or-equal-to */
|
||||
struct op_geq {};
|
||||
/** @brief A tag class representing less-than-or-equal-to */
|
||||
struct op_leq {};
|
||||
|
||||
template<class T>
|
||||
struct op_reduce_vector{ };
|
||||
|
||||
template<class T>
|
||||
struct op_reduce_rows{ };
|
||||
|
||||
template<class T>
|
||||
struct op_reduce_columns{ };
|
||||
|
||||
/** @brief A tag class representing element-wise casting operations on vectors and matrices */
|
||||
template<typename OP>
|
||||
struct op_element_cast {};
|
||||
|
||||
/** @brief A tag class representing element-wise binary operations (like multiplication) on vectors or matrices */
|
||||
template<typename OP>
|
||||
struct op_element_binary {};
|
||||
|
||||
/** @brief A tag class representing element-wise unary operations (like sin()) on vectors or matrices */
|
||||
template<typename OP>
|
||||
struct op_element_unary {};
|
||||
|
||||
/** @brief A tag class representing the modulus function for integers */
|
||||
struct op_abs {};
|
||||
/** @brief A tag class representing the acos() function */
|
||||
struct op_acos {};
|
||||
/** @brief A tag class representing the asin() function */
|
||||
struct op_asin {};
|
||||
/** @brief A tag class for representing the argmax() function */
|
||||
struct op_argmax {};
|
||||
/** @brief A tag class for representing the argmin() function */
|
||||
struct op_argmin {};
|
||||
/** @brief A tag class representing the atan() function */
|
||||
struct op_atan {};
|
||||
/** @brief A tag class representing the atan2() function */
|
||||
struct op_atan2 {};
|
||||
/** @brief A tag class representing the ceil() function */
|
||||
struct op_ceil {};
|
||||
/** @brief A tag class representing the cos() function */
|
||||
struct op_cos {};
|
||||
/** @brief A tag class representing the cosh() function */
|
||||
struct op_cosh {};
|
||||
/** @brief A tag class representing the exp() function */
|
||||
struct op_exp {};
|
||||
/** @brief A tag class representing the fabs() function */
|
||||
struct op_fabs {};
|
||||
/** @brief A tag class representing the fdim() function */
|
||||
struct op_fdim {};
|
||||
/** @brief A tag class representing the floor() function */
|
||||
struct op_floor {};
|
||||
/** @brief A tag class representing the fmax() function */
|
||||
struct op_fmax {};
|
||||
/** @brief A tag class representing the fmin() function */
|
||||
struct op_fmin {};
|
||||
/** @brief A tag class representing the fmod() function */
|
||||
struct op_fmod {};
|
||||
/** @brief A tag class representing the log() function */
|
||||
struct op_log {};
|
||||
/** @brief A tag class representing the log10() function */
|
||||
struct op_log10 {};
|
||||
/** @brief A tag class representing the sin() function */
|
||||
struct op_sin {};
|
||||
/** @brief A tag class representing the sinh() function */
|
||||
struct op_sinh {};
|
||||
/** @brief A tag class representing the sqrt() function */
|
||||
struct op_sqrt {};
|
||||
/** @brief A tag class representing the tan() function */
|
||||
struct op_tan {};
|
||||
/** @brief A tag class representing the tanh() function */
|
||||
struct op_tanh {};
|
||||
|
||||
/** @brief A tag class representing the (off-)diagonal of a matrix */
|
||||
struct op_matrix_diag {};
|
||||
|
||||
/** @brief A tag class representing a matrix given by a vector placed on a certain (off-)diagonal */
|
||||
struct op_vector_diag {};
|
||||
|
||||
/** @brief A tag class representing the extraction of a matrix row to a vector */
|
||||
struct op_row {};
|
||||
|
||||
/** @brief A tag class representing the extraction of a matrix column to a vector */
|
||||
struct op_column {};
|
||||
|
||||
/** @brief A tag class representing inner products of two vectors */
|
||||
struct op_inner_prod {};
|
||||
|
||||
/** @brief A tag class representing the 1-norm of a vector */
|
||||
struct op_norm_1 {};
|
||||
|
||||
/** @brief A tag class representing the 2-norm of a vector */
|
||||
struct op_norm_2 {};
|
||||
|
||||
/** @brief A tag class representing the inf-norm of a vector */
|
||||
struct op_norm_inf {};
|
||||
|
||||
/** @brief A tag class representing the maximum of a vector */
|
||||
struct op_max {};
|
||||
|
||||
/** @brief A tag class representing the minimum of a vector */
|
||||
struct op_min {};
|
||||
|
||||
|
||||
/** @brief A tag class representing the Frobenius-norm of a matrix */
|
||||
struct op_norm_frobenius {};
|
||||
|
||||
/** @brief A tag class representing transposed matrices */
|
||||
struct op_trans {};
|
||||
|
||||
/** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */
|
||||
struct op_flip_sign {};
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
class vector_expression;
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
class matrix_expression;
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
class scalar_expression;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@@ -3,13 +3,11 @@
|
||||
|
||||
#include "rapidjson/document.h"
|
||||
|
||||
#include "viennacl/ocl/program.hpp"
|
||||
#include "viennacl/tools/timer.hpp"
|
||||
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
#include "atidlas/model/tools.hpp"
|
||||
#include "atidlas/tools/shared_ptr.hpp"
|
||||
#include "atidlas/tools/lazy_program_compiler.hpp"
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
#include "atidlas/tools/timer.hpp"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
@@ -107,7 +105,7 @@ namespace atidlas
|
||||
templates_(1,tp.clone()), context_(context), device_(device)
|
||||
{}
|
||||
|
||||
void execute(statements_container const & statements, bool bypass_predictor = false, bool force_recompilation = false)
|
||||
void execute(scheduler::statements_container const & statements, bool bypass_predictor = false, bool force_recompilation = false)
|
||||
{
|
||||
bypass_predictor = bypass_predictor || predictor_.get()==NULL;
|
||||
|
||||
@@ -147,11 +145,11 @@ namespace atidlas
|
||||
templates_[label]->enqueue("k" + tools::to_string(label), lazy_programs_, statements);
|
||||
}
|
||||
|
||||
void tune(statements_container const & statements)
|
||||
void tune(scheduler::statements_container const & statements)
|
||||
{
|
||||
//Collect the timings
|
||||
std::vector<float> timings(templates_.size());
|
||||
viennacl::tools::timer timer;
|
||||
tools::timer timer;
|
||||
for(size_t i = 0 ; i < templates_.size() ; ++i)
|
||||
{
|
||||
timer.start();
|
||||
|
424
atidlas/scheduler/forwards.h
Normal file
424
atidlas/scheduler/forwards.h
Normal file
@@ -0,0 +1,424 @@
|
||||
#ifndef ATIDLAS_SCHEDULER_STATEMENT_HPP
|
||||
#define ATIDLAS_SCHEDULER_STATEMENT_HPP
|
||||
|
||||
#include "atidlas/forwards.h"
|
||||
#include "atidlas/tools/predicate.hpp"
|
||||
#include "atidlas/tools/enable_if.hpp"
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
namespace scheduler
|
||||
{
|
||||
|
||||
/** @brief Optimization enum for grouping operations into unary or binary operations. Just for optimization of lookups. */
|
||||
enum operation_node_type_family
|
||||
{
|
||||
OPERATION_INVALID_TYPE_FAMILY = 0,
|
||||
|
||||
// unary or binary expression
|
||||
OPERATION_UNARY_TYPE_FAMILY,
|
||||
OPERATION_BINARY_TYPE_FAMILY,
|
||||
|
||||
//reductions
|
||||
OPERATION_VECTOR_REDUCTION_TYPE_FAMILY,
|
||||
OPERATION_ROWS_REDUCTION_TYPE_FAMILY,
|
||||
OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY
|
||||
};
|
||||
|
||||
/** @brief Enumeration for identifying the possible operations */
|
||||
enum operation_node_type
|
||||
{
|
||||
OPERATION_INVALID_TYPE = 0,
|
||||
|
||||
// unary operator
|
||||
OPERATION_UNARY_MINUS_TYPE,
|
||||
|
||||
// unary expression
|
||||
OPERATION_UNARY_CAST_CHAR_TYPE,
|
||||
OPERATION_UNARY_CAST_UCHAR_TYPE,
|
||||
OPERATION_UNARY_CAST_SHORT_TYPE,
|
||||
OPERATION_UNARY_CAST_USHORT_TYPE,
|
||||
OPERATION_UNARY_CAST_INT_TYPE,
|
||||
OPERATION_UNARY_CAST_UINT_TYPE,
|
||||
OPERATION_UNARY_CAST_LONG_TYPE,
|
||||
OPERATION_UNARY_CAST_ULONG_TYPE,
|
||||
OPERATION_UNARY_CAST_HALF_TYPE,
|
||||
OPERATION_UNARY_CAST_FLOAT_TYPE,
|
||||
OPERATION_UNARY_CAST_DOUBLE_TYPE,
|
||||
|
||||
OPERATION_UNARY_ABS_TYPE,
|
||||
OPERATION_UNARY_ACOS_TYPE,
|
||||
OPERATION_UNARY_ASIN_TYPE,
|
||||
OPERATION_UNARY_ATAN_TYPE,
|
||||
OPERATION_UNARY_CEIL_TYPE,
|
||||
OPERATION_UNARY_COS_TYPE,
|
||||
OPERATION_UNARY_COSH_TYPE,
|
||||
OPERATION_UNARY_EXP_TYPE,
|
||||
OPERATION_UNARY_FABS_TYPE,
|
||||
OPERATION_UNARY_FLOOR_TYPE,
|
||||
OPERATION_UNARY_LOG_TYPE,
|
||||
OPERATION_UNARY_LOG10_TYPE,
|
||||
OPERATION_UNARY_SIN_TYPE,
|
||||
OPERATION_UNARY_SINH_TYPE,
|
||||
OPERATION_UNARY_SQRT_TYPE,
|
||||
OPERATION_UNARY_TAN_TYPE,
|
||||
OPERATION_UNARY_TANH_TYPE,
|
||||
|
||||
OPERATION_UNARY_TRANS_TYPE,
|
||||
OPERATION_UNARY_NORM_1_TYPE,
|
||||
OPERATION_UNARY_NORM_2_TYPE,
|
||||
OPERATION_UNARY_NORM_INF_TYPE,
|
||||
OPERATION_UNARY_MAX_TYPE,
|
||||
OPERATION_UNARY_MIN_TYPE,
|
||||
|
||||
// binary expression
|
||||
OPERATION_BINARY_ACCESS_TYPE,
|
||||
OPERATION_BINARY_ASSIGN_TYPE,
|
||||
OPERATION_BINARY_INPLACE_ADD_TYPE,
|
||||
OPERATION_BINARY_INPLACE_SUB_TYPE,
|
||||
OPERATION_BINARY_ADD_TYPE,
|
||||
OPERATION_BINARY_SUB_TYPE,
|
||||
OPERATION_BINARY_MULT_TYPE,
|
||||
OPERATION_BINARY_DIV_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_ARGMAX_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_ARGMIN_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_PROD_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_DIV_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_EQ_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_NEQ_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_GREATER_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_GEQ_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_LESS_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_LEQ_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_POW_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_FMAX_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_FMIN_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_MAX_TYPE,
|
||||
OPERATION_BINARY_ELEMENT_MIN_TYPE,
|
||||
|
||||
OPERATION_BINARY_MATRIX_DIAG_TYPE,
|
||||
OPERATION_BINARY_VECTOR_DIAG_TYPE,
|
||||
OPERATION_BINARY_MATRIX_ROW_TYPE,
|
||||
OPERATION_BINARY_MATRIX_COLUMN_TYPE,
|
||||
OPERATION_BINARY_MAT_VEC_PROD_TYPE,
|
||||
OPERATION_BINARY_MAT_MAT_PROD_TYPE,
|
||||
OPERATION_BINARY_INNER_PROD_TYPE
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
namespace result_of
|
||||
{
|
||||
template<typename T>
|
||||
struct op_type_info
|
||||
{
|
||||
typedef typename T::ERROR_UNKNOWN_OP_TYPE error_type;
|
||||
};
|
||||
|
||||
// elementwise casts
|
||||
template<> struct op_type_info<op_element_cast<char> > { enum { id = OPERATION_UNARY_CAST_CHAR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<unsigned char> > { enum { id = OPERATION_UNARY_CAST_UCHAR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<short> > { enum { id = OPERATION_UNARY_CAST_SHORT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<unsigned short> > { enum { id = OPERATION_UNARY_CAST_USHORT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<int> > { enum { id = OPERATION_UNARY_CAST_INT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<unsigned int> > { enum { id = OPERATION_UNARY_CAST_UINT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<long> > { enum { id = OPERATION_UNARY_CAST_LONG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<unsigned long> > { enum { id = OPERATION_UNARY_CAST_ULONG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<float> > { enum { id = OPERATION_UNARY_CAST_FLOAT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_cast<double> > { enum { id = OPERATION_UNARY_CAST_DOUBLE_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
|
||||
// elementwise functions
|
||||
template<> struct op_type_info<op_element_unary<op_abs> > { enum { id = OPERATION_UNARY_ABS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_acos> > { enum { id = OPERATION_UNARY_ACOS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_asin> > { enum { id = OPERATION_UNARY_ASIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_atan> > { enum { id = OPERATION_UNARY_ATAN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_ceil> > { enum { id = OPERATION_UNARY_CEIL_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_cos> > { enum { id = OPERATION_UNARY_COS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_cosh> > { enum { id = OPERATION_UNARY_COSH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_exp> > { enum { id = OPERATION_UNARY_EXP_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_fabs> > { enum { id = OPERATION_UNARY_FABS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_floor> > { enum { id = OPERATION_UNARY_FLOOR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_log> > { enum { id = OPERATION_UNARY_LOG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_log10> > { enum { id = OPERATION_UNARY_LOG10_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_sin> > { enum { id = OPERATION_UNARY_SIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_sinh> > { enum { id = OPERATION_UNARY_SINH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_sqrt> > { enum { id = OPERATION_UNARY_SQRT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_tan> > { enum { id = OPERATION_UNARY_TAN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_unary<op_tanh> > { enum { id = OPERATION_UNARY_TANH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
|
||||
template<> struct op_type_info<op_element_binary<op_argmax> > { enum { id = OPERATION_BINARY_ELEMENT_ARGMAX_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_argmin> > { enum { id = OPERATION_BINARY_ELEMENT_ARGMIN_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_pow> > { enum { id = OPERATION_BINARY_ELEMENT_POW_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_eq> > { enum { id = OPERATION_BINARY_ELEMENT_EQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_neq> > { enum { id = OPERATION_BINARY_ELEMENT_NEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_greater> > { enum { id = OPERATION_BINARY_ELEMENT_GREATER_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_less> > { enum { id = OPERATION_BINARY_ELEMENT_LESS_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_geq> > { enum { id = OPERATION_BINARY_ELEMENT_GEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_leq> > { enum { id = OPERATION_BINARY_ELEMENT_LEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_fmax> > { enum { id = OPERATION_BINARY_ELEMENT_FMAX_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_fmin> > { enum { id = OPERATION_BINARY_ELEMENT_FMIN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
|
||||
|
||||
//structurewise function
|
||||
template<> struct op_type_info<op_norm_1 > { enum { id = OPERATION_UNARY_NORM_1_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_norm_2 > { enum { id = OPERATION_UNARY_NORM_2_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_norm_inf > { enum { id = OPERATION_UNARY_NORM_INF_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_max > { enum { id = OPERATION_UNARY_MAX_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_min > { enum { id = OPERATION_UNARY_MIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
|
||||
template<> struct op_type_info<op_trans > { enum { id = OPERATION_UNARY_TRANS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_row > { enum { id = OPERATION_BINARY_MATRIX_ROW_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_column > { enum { id = OPERATION_BINARY_MATRIX_COLUMN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
|
||||
template<> struct op_type_info<op_matrix_diag> { enum { id = OPERATION_BINARY_MATRIX_DIAG_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_vector_diag> { enum { id = OPERATION_BINARY_VECTOR_DIAG_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
|
||||
template<> struct op_type_info<op_prod> { enum { id = OPERATION_BINARY_MAT_VEC_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_mat_mat_prod> { enum { id = OPERATION_BINARY_MAT_MAT_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_inner_prod> { enum { id = OPERATION_BINARY_INNER_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
|
||||
template<typename OP> struct op_type_info<op_reduce_vector<OP> > { enum { id = op_type_info<OP>::id, family = OPERATION_VECTOR_REDUCTION_TYPE_FAMILY}; };
|
||||
template<typename OP> struct op_type_info<op_reduce_rows<OP> > { enum { id = op_type_info<OP>::id, family = OPERATION_ROWS_REDUCTION_TYPE_FAMILY}; };
|
||||
template<typename OP> struct op_type_info<op_reduce_columns<OP> > { enum { id = op_type_info<OP>::id, family = OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY}; };
|
||||
|
||||
//elementwise operator
|
||||
template<> struct op_type_info<op_assign> { enum { id = OPERATION_BINARY_ASSIGN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_inplace_add> { enum { id = OPERATION_BINARY_INPLACE_ADD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_inplace_sub> { enum { id = OPERATION_BINARY_INPLACE_SUB_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_add> { enum { id = OPERATION_BINARY_ADD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_sub> { enum { id = OPERATION_BINARY_SUB_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_prod> > { enum { id = OPERATION_BINARY_ELEMENT_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_element_binary<op_div> > { enum { id = OPERATION_BINARY_ELEMENT_DIV_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_mult> { enum { id = OPERATION_BINARY_MULT_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
template<> struct op_type_info<op_div> { enum { id = OPERATION_BINARY_DIV_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; };
|
||||
|
||||
template<> struct op_type_info<op_flip_sign> { enum { id = OPERATION_UNARY_MINUS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; };
|
||||
|
||||
|
||||
/** \endcond */
|
||||
} // namespace result_of
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/** @brief Groups the type of a node in the statement tree. Used for faster dispatching */
|
||||
enum statement_node_type_family
|
||||
{
|
||||
INVALID_TYPE_FAMILY = 0,
|
||||
// LHS or RHS are again an expression:
|
||||
COMPOSITE_OPERATION_FAMILY,
|
||||
// device scalars:
|
||||
SCALAR_TYPE_FAMILY,
|
||||
// vector:
|
||||
VECTOR_TYPE_FAMILY,
|
||||
// matrices:
|
||||
MATRIX_TYPE_FAMILY
|
||||
};
|
||||
|
||||
/** @brief Encodes the type of a node in the statement tree. */
|
||||
enum statement_node_subtype
|
||||
{
|
||||
INVALID_SUBTYPE = 0,
|
||||
|
||||
HOST_SCALAR_TYPE,
|
||||
DEVICE_SCALAR_TYPE,
|
||||
|
||||
DENSE_VECTOR_TYPE,
|
||||
IMPLICIT_VECTOR_TYPE,
|
||||
|
||||
DENSE_MATRIX_TYPE,
|
||||
IMPLICIT_MATRIX_TYPE,
|
||||
};
|
||||
|
||||
/** @brief A class representing the 'data' for the LHS or RHS operand of the respective node.
|
||||
*
|
||||
* If it represents a compound expression, the union holds the array index within the respective statement array.
|
||||
* If it represents a object with data (vector, matrix, etc.) it holds the respective pointer (scalar, vector, matrix) or value (host scalar)
|
||||
*
|
||||
* The member 'type_family' is an optimization for quickly retrieving the 'type', which denotes the currently 'active' member in the union
|
||||
*/
|
||||
struct lhs_rhs_element
|
||||
{
|
||||
statement_node_type_family type_family;
|
||||
statement_node_subtype subtype;
|
||||
numeric_type numeric_t;
|
||||
|
||||
union
|
||||
{
|
||||
unsigned int node_index;
|
||||
atidlas::vector_base * vector;
|
||||
atidlas::matrix_base * matrix;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** @brief Struct for holding the type family as well as the type of an operation (could be addition, subtraction, norm, etc.) */
|
||||
struct op_element
|
||||
{
|
||||
operation_node_type_family type_family;
|
||||
operation_node_type type;
|
||||
};
|
||||
|
||||
/** @brief Main datastructure for an node in the statement tree */
|
||||
struct statement_node
|
||||
{
|
||||
lhs_rhs_element lhs;
|
||||
op_element op;
|
||||
lhs_rhs_element rhs;
|
||||
};
|
||||
|
||||
namespace result_of
|
||||
{
|
||||
|
||||
template<class T> struct num_nodes { enum { value = 0 }; };
|
||||
template<class LHS, class OP, class RHS> struct num_nodes< vector_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
|
||||
template<class LHS, class OP, class RHS> struct num_nodes< const vector_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
|
||||
template<class LHS, class OP, class RHS> struct num_nodes< matrix_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
|
||||
template<class LHS, class OP, class RHS> struct num_nodes< const matrix_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
|
||||
template<class LHS, class OP, class RHS> struct num_nodes< scalar_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
|
||||
template<class LHS, class OP, class RHS> struct num_nodes< const scalar_expression<LHS, RHS, OP> > { enum { value = 1 + num_nodes<LHS>::value + num_nodes<RHS>::value + num_nodes<OP>::value }; };
|
||||
|
||||
}
|
||||
|
||||
/** \brief The main class for representing a statement such as x = inner_prod(y,z); at runtime.
|
||||
*
|
||||
* This is the equivalent to an expression template tree, but entirely built at runtime in order to perform really cool stuff such as kernel fusion.
|
||||
*/
|
||||
class statement
|
||||
{
|
||||
public:
|
||||
typedef statement_node value_type;
|
||||
typedef std::vector<value_type> container_type;
|
||||
|
||||
statement(container_type const & custom_array) : array_(custom_array) {}
|
||||
|
||||
/** @brief Generate the runtime statement from an expression template.
|
||||
*
|
||||
* Constructing a runtime statement from expression templates makes perfect sense, because this way only a single allocation is needed when creating the statement. */
|
||||
template<typename LHS, typename OP, typename RHS>
|
||||
statement(LHS & lhs, OP const &, RHS const & rhs) : array_(1 + result_of::num_nodes<RHS>::value)
|
||||
{
|
||||
array_[0].op.type_family = operation_node_type_family(result_of::op_type_info<OP>::family);
|
||||
array_[0].op.type = operation_node_type(result_of::op_type_info<OP>::id);
|
||||
add_lhs(0, 1, lhs);
|
||||
add_rhs(0, 1, rhs);
|
||||
}
|
||||
|
||||
container_type const & array() const { return array_; }
|
||||
unsigned int root() const { return 0; }
|
||||
private:
|
||||
|
||||
//////////// Tree nodes (non-terminals) ////////////////////
|
||||
|
||||
unsigned int add_element(unsigned int next_free, lhs_rhs_element & elem, vector_base const & x)
|
||||
{
|
||||
elem.type_family = VECTOR_TYPE_FAMILY;
|
||||
elem.subtype = DENSE_VECTOR_TYPE;
|
||||
elem.vector = const_cast<vector_base*>(&x);
|
||||
return next_free;
|
||||
}
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
unsigned int add_element(unsigned int next_free,
|
||||
lhs_rhs_element & elem,
|
||||
scalar_expression<LHS, RHS, OP> const & t)
|
||||
{
|
||||
elem.type_family = COMPOSITE_OPERATION_FAMILY;
|
||||
elem.subtype = INVALID_SUBTYPE;
|
||||
elem.node_index = next_free;
|
||||
return add_node(next_free, next_free + 1, t);
|
||||
}
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
unsigned int add_element(unsigned int next_free,
|
||||
lhs_rhs_element & elem,
|
||||
vector_expression<LHS, RHS, OP> const & t)
|
||||
{
|
||||
elem.type_family = COMPOSITE_OPERATION_FAMILY;
|
||||
elem.subtype = INVALID_SUBTYPE;
|
||||
elem.node_index = next_free;
|
||||
return add_node(next_free, next_free + 1, t);
|
||||
}
|
||||
|
||||
template<typename LHS, typename RHS, typename OP>
|
||||
unsigned int add_element(unsigned int next_free,
|
||||
lhs_rhs_element & elem,
|
||||
matrix_expression<LHS, RHS, OP> const & t)
|
||||
{
|
||||
elem.type_family = COMPOSITE_OPERATION_FAMILY;
|
||||
elem.subtype = INVALID_SUBTYPE;
|
||||
elem.numeric_t = INVALID_NUMERIC_TYPE;
|
||||
elem.node_index = next_free;
|
||||
return add_node(next_free, next_free + 1, t);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
unsigned int add_lhs(unsigned int current_index, unsigned int next_free, T const & t)
|
||||
{ return add_element(next_free, array_[current_index].lhs, t); }
|
||||
|
||||
template<typename T>
|
||||
unsigned int add_rhs(unsigned int current_index, unsigned int next_free, T const & t)
|
||||
{ return add_element(next_free, array_[current_index].rhs, t); }
|
||||
|
||||
template<template<typename, typename, typename> class ExpressionT, typename LHS, typename RHS, typename OP>
|
||||
unsigned int add_node(unsigned int current_index, unsigned int next_free, ExpressionT<LHS, RHS, OP> const & proxy)
|
||||
{
|
||||
// set OP:
|
||||
array_[current_index].op.type_family = operation_node_type_family(result_of::op_type_info<OP>::family);
|
||||
array_[current_index].op.type = operation_node_type(result_of::op_type_info<OP>::id);
|
||||
|
||||
// set LHS and RHS:
|
||||
if (array_[current_index].op.type_family == OPERATION_UNARY_TYPE_FAMILY)
|
||||
{
|
||||
// unary expression: set rhs to invalid:
|
||||
array_[current_index].rhs.type_family = INVALID_TYPE_FAMILY;
|
||||
array_[current_index].rhs.subtype = INVALID_SUBTYPE;
|
||||
array_[current_index].rhs.numeric_t = INVALID_NUMERIC_TYPE;
|
||||
return add_lhs(current_index, next_free, proxy.lhs());
|
||||
}
|
||||
|
||||
return add_rhs(current_index, add_lhs(current_index, next_free, proxy.lhs()), proxy.rhs());
|
||||
}
|
||||
|
||||
container_type array_;
|
||||
};
|
||||
|
||||
class statements_container
|
||||
{
|
||||
public:
|
||||
typedef std::list<scheduler::statement> data_type;
|
||||
enum order_type { SEQUENTIAL, INDEPENDENT };
|
||||
|
||||
statements_container(data_type const & data, order_type order) : data_(data), order_(order)
|
||||
{ }
|
||||
|
||||
statements_container(scheduler::statement const & s0) : order_(INDEPENDENT)
|
||||
{
|
||||
data_.push_back(s0);
|
||||
}
|
||||
|
||||
statements_container(scheduler::statement const & s0, scheduler::statement const & s1, order_type order) : order_(order)
|
||||
{
|
||||
data_.push_back(s0);
|
||||
data_.push_back(s1);
|
||||
}
|
||||
|
||||
std::list<scheduler::statement> const & data() const { return data_; }
|
||||
order_type order() const { return order_; }
|
||||
private:
|
||||
std::list<scheduler::statement> data_;
|
||||
order_type order_;
|
||||
};
|
||||
|
||||
} // namespace scheduler
|
||||
} // namespace viennacl
|
||||
|
||||
#endif
|
||||
|
43
atidlas/tools/enable_if.hpp
Normal file
43
atidlas/tools/enable_if.hpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#ifndef VIENNACL_META_ENABLE_IF_HPP_
|
||||
#define VIENNACL_META_ENABLE_IF_HPP_
|
||||
|
||||
/* =========================================================================
|
||||
Copyright (c) 2010-2014, Institute for Microelectronics,
|
||||
Institute for Analysis and Scientific Computing,
|
||||
TU Wien.
|
||||
Portions of this software are copyright by UChicago Argonne, LLC.
|
||||
|
||||
-----------------
|
||||
ViennaCL - The Vienna Computing Library
|
||||
-----------------
|
||||
|
||||
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
|
||||
|
||||
(A list of authors and contributors can be found in the PDF manual)
|
||||
|
||||
License: MIT (X11), see file LICENSE in the base directory
|
||||
============================================================================= */
|
||||
|
||||
/** @file viennacl/meta/enable_if.hpp
|
||||
@brief Simple enable-if variant that uses the SFINAE pattern
|
||||
*/
|
||||
|
||||
namespace viennacl
|
||||
{
|
||||
|
||||
/** @brief Simple enable-if variant that uses the SFINAE pattern */
|
||||
template<bool b, class T = void>
|
||||
struct enable_if
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
/** \cond */
|
||||
template<class T>
|
||||
struct enable_if<false, T> {};
|
||||
/** \endcond */
|
||||
|
||||
} //namespace viennacl
|
||||
|
||||
|
||||
#endif
|
@@ -1,77 +0,0 @@
|
||||
#ifndef ATIDLAS_EXECUTION_HANDLER_HPP
|
||||
#define ATIDLAS_EXECUTION_HANDLER_HPP
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "viennacl/tools/shared_ptr.hpp"
|
||||
|
||||
#include "atidlas/tools/lazy_program_compiler.hpp"
|
||||
#include "atidlas/backend/templates/template_base.hpp"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
class execution_handler
|
||||
{
|
||||
public:
|
||||
typedef std::map< std::string, tools::shared_ptr<template_base> > container_type;
|
||||
|
||||
private:
|
||||
std::string append_prefix(std::string const & str)
|
||||
{
|
||||
return "_" + str;
|
||||
}
|
||||
|
||||
std::string define_extension(std::string const & ext)
|
||||
{
|
||||
// Note: On devices without double precision support, 'ext' is an empty string.
|
||||
return (ext.length() > 1) ? std::string("#pragma OPENCL EXTENSION " + ext + " : enable\n") : std::string("\n");
|
||||
}
|
||||
|
||||
void init_program_compiler(std::string const & name, bool force_recompilation)
|
||||
{
|
||||
lazy_programs_.push_back(lazy_program_compiler(&ctx_, name, force_recompilation));
|
||||
lazy_programs_.back().add(define_extension(device_.double_support_extension()));
|
||||
}
|
||||
|
||||
public:
|
||||
execution_handler(std::string const & program_name_base, viennacl::ocl::context & ctx, viennacl::ocl::device const & device, bool force_recompilation = false) : ctx_(ctx), device_(device), program_names_(2), init_done_(false)
|
||||
{
|
||||
lazy_programs_.reserve(2);
|
||||
init_program_compiler(program_name_base + "_0", force_recompilation);
|
||||
init_program_compiler(program_name_base + "_1", force_recompilation);
|
||||
}
|
||||
|
||||
void add(std::string const & key, template_base const & T, statements_container const & statements)
|
||||
{
|
||||
if (kernels_.insert(container_type::value_type(key, T.clone())).second)
|
||||
{
|
||||
std::vector<std::string> sources = kernels_.at(key)->generate(append_prefix(key), statements, device_);
|
||||
assert(sources.size()<=2);
|
||||
for (unsigned int i = 0; i < sources.size(); ++i)
|
||||
lazy_programs_[i].add(sources[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template_base * template_of(std::string const & key)
|
||||
{
|
||||
return kernels_.at(key).get();
|
||||
}
|
||||
|
||||
void execute(container_type::key_type const & key, statements_container const & statements)
|
||||
{
|
||||
tools::shared_ptr<template_base> & template_pointer = kernels_.at(key);
|
||||
template_pointer->enqueue(append_prefix(key), lazy_programs_, statements);
|
||||
}
|
||||
|
||||
private:
|
||||
viennacl::ocl::context & ctx_;
|
||||
viennacl::ocl::device const & device_;
|
||||
container_type kernels_;
|
||||
std::vector<std::string> program_names_;
|
||||
std::vector<lazy_program_compiler> lazy_programs_;
|
||||
bool init_done_;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@@ -2,7 +2,7 @@
|
||||
#define ATIDLAS_LAZY_PROGRAM_COMPILER_HPP
|
||||
|
||||
#include <map>
|
||||
#include "viennacl/ocl/context.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
@@ -11,29 +11,29 @@ namespace atidlas
|
||||
{
|
||||
public:
|
||||
|
||||
lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, std::string const & src, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), src_(src), force_recompilation_(force_recompilation){ }
|
||||
lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), force_recompilation_(force_recompilation){ }
|
||||
lazy_program_compiler(cl::Context * ctx, std::string const & name, std::string const & src, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), src_(src), force_recompilation_(force_recompilation){ }
|
||||
lazy_program_compiler(cl::Context * ctx, std::string const & name, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), force_recompilation_(force_recompilation){ }
|
||||
|
||||
void add(std::string const & src) { src_+=src; }
|
||||
|
||||
std::string const & src() const { return src_; }
|
||||
|
||||
viennacl::ocl::program & program()
|
||||
cl::Program & program()
|
||||
{
|
||||
if(program_==NULL)
|
||||
{
|
||||
if (force_recompilation_ && ctx_->has_program(name_))
|
||||
ctx_->delete_program(name_);
|
||||
if (!ctx_->has_program(name_))
|
||||
ctx_->add_program(src_, name_);
|
||||
program_ = &ctx_->get_program(name_);
|
||||
}
|
||||
return *program_;
|
||||
// if(program_==NULL)
|
||||
// {
|
||||
// if (force_recompilation_ && ctx_->has_program(name_))
|
||||
// ctx_->delete_program(name_);
|
||||
// if (!ctx_->has_program(name_))
|
||||
// ctx_->add_program(src_, name_);
|
||||
// program_ = &ctx_->get_program(name_);
|
||||
// }
|
||||
// return *program_;
|
||||
}
|
||||
|
||||
private:
|
||||
viennacl::ocl::context * ctx_;
|
||||
viennacl::ocl::program * program_;
|
||||
cl::Context * ctx_;
|
||||
cl::Program * program_;
|
||||
std::string name_;
|
||||
std::string src_;
|
||||
bool force_recompilation_;
|
||||
|
47
atidlas/tools/predicate.hpp
Normal file
47
atidlas/tools/predicate.hpp
Normal file
@@ -0,0 +1,47 @@
|
||||
#ifndef ATIDLAS_PREDICATE_HPP_
|
||||
#define ATIDLAS_PREDICATE_HPP_
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
/** @brief Helper class for checking whether a type is a primitive type. */
|
||||
template<class T>
|
||||
struct is_primitive_type{ enum {value = false}; };
|
||||
|
||||
template<> struct is_primitive_type<float> { enum { value = true }; };
|
||||
template<> struct is_primitive_type<double> { enum { value = true }; };
|
||||
template<> struct is_primitive_type<unsigned int> { enum { value = true }; };
|
||||
template<> struct is_primitive_type<int> { enum { value = true }; };
|
||||
template<> struct is_primitive_type<unsigned char> { enum { value = true }; };
|
||||
template<> struct is_primitive_type<char> { enum { value = true }; };
|
||||
template<> struct is_primitive_type<unsigned long> { enum { value = true }; };
|
||||
template<> struct is_primitive_type<long> { enum { value = true }; };
|
||||
template<> struct is_primitive_type<unsigned short>{ enum { value = true }; };
|
||||
template<> struct is_primitive_type<short> { enum { value = true }; };
|
||||
|
||||
|
||||
/** @brief Helper class for checking whether a particular type is a native OpenCL type. */
|
||||
template<class T>
|
||||
struct is_cl_type{ enum { value = false }; };
|
||||
|
||||
template<> struct is_cl_type<cl_float> { enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_double>{ enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_uint> { enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_int> { enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_uchar> { enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_char> { enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_ulong> { enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_long> { enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_ushort>{ enum { value = true }; };
|
||||
template<> struct is_cl_type<cl_short> { enum { value = true }; };
|
||||
|
||||
/** @brief Helper class for checking whether a particular type is a floating point type. */
|
||||
template<class T> struct is_floating_point { enum { value = false }; };
|
||||
template<> struct is_floating_point<float> { enum { value = true }; };
|
||||
template<> struct is_floating_point<double> { enum { value = true }; };
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
123
atidlas/tools/timer.hpp
Normal file
123
atidlas/tools/timer.hpp
Normal file
@@ -0,0 +1,123 @@
|
||||
#ifndef _VIENNACL_TOOLS_TIMER_HPP_
|
||||
#define _VIENNACL_TOOLS_TIMER_HPP_
|
||||
|
||||
/* =========================================================================
|
||||
Copyright (c) 2010-2014, Institute for Microelectronics,
|
||||
Institute for Analysis and Scientific Computing,
|
||||
TU Wien.
|
||||
Portions of this software are copyright by UChicago Argonne, LLC.
|
||||
|
||||
-----------------
|
||||
ViennaCL - The Vienna Computing Library
|
||||
-----------------
|
||||
|
||||
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
|
||||
|
||||
(A list of authors and contributors can be found in the PDF manual)
|
||||
|
||||
License: MIT (X11), see file LICENSE in the base directory
|
||||
============================================================================= */
|
||||
|
||||
|
||||
/** @file viennacl/tools/timer.hpp
|
||||
@brief A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling. */
|
||||
|
||||
#include <iostream>
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#define WINDOWS_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#undef min
|
||||
#undef max
|
||||
|
||||
namespace viennacl
|
||||
{
|
||||
namespace tools
|
||||
{
|
||||
|
||||
/** @brief Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
|
||||
*
|
||||
* Avoids messing with Boost and should be sufficient for benchmarking purposes.
|
||||
*/
|
||||
class timer
|
||||
{
|
||||
public:
|
||||
|
||||
timer()
|
||||
{
|
||||
QueryPerformanceFrequency(&freq);
|
||||
}
|
||||
|
||||
void start()
|
||||
{
|
||||
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
|
||||
}
|
||||
|
||||
double get() const
|
||||
{
|
||||
LARGE_INTEGER elapsed;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
|
||||
elapsed.QuadPart = end_time.QuadPart - start_time.QuadPart;
|
||||
return elapsed.QuadPart / static_cast<double>(freq.QuadPart);
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
LARGE_INTEGER freq;
|
||||
LARGE_INTEGER start_time;
|
||||
LARGE_INTEGER end_time;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
namespace viennacl
|
||||
{
|
||||
namespace tools
|
||||
{
|
||||
|
||||
/** @brief Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
|
||||
*
|
||||
* Avoids messing with Boost and should be sufficient for benchmarking purposes.
|
||||
*/
|
||||
class timer
|
||||
{
|
||||
public:
|
||||
|
||||
timer() : ts(0)
|
||||
{}
|
||||
|
||||
void start()
|
||||
{
|
||||
struct timeval tval;
|
||||
gettimeofday(&tval, NULL);
|
||||
ts = static_cast<double>(tval.tv_sec * 1000000 + tval.tv_usec);
|
||||
}
|
||||
|
||||
double get() const
|
||||
{
|
||||
struct timeval tval;
|
||||
gettimeofday(&tval, NULL);
|
||||
double end_time = static_cast<double>(tval.tv_sec * 1000000 + tval.tv_usec);
|
||||
|
||||
return static_cast<double>(end_time-ts) / 1000000.0;
|
||||
}
|
||||
|
||||
private:
|
||||
double ts;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
43
atidlas/traits/size.hpp
Normal file
43
atidlas/traits/size.hpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#ifndef ATIDLAS_TRAITS_SIZE_HPP_
|
||||
#define ATIDLAS_TRAITS_SIZE_HPP_
|
||||
|
||||
#include "atidlas/forwards.h"
|
||||
#include "atidlas/tools/predicate.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
namespace traits
|
||||
{
|
||||
|
||||
template<typename LHS>
|
||||
atidlas_int_t size(vector_expression<LHS, const int, op_matrix_diag> const & proxy)
|
||||
{
|
||||
int k = proxy.rhs();
|
||||
int A_size1 = static_cast<int>(size1(proxy.lhs()));
|
||||
int A_size2 = static_cast<int>(size2(proxy.lhs()));
|
||||
|
||||
int row_depth = std::min(A_size1, A_size1 + k);
|
||||
int col_depth = std::min(A_size2, A_size2 - k);
|
||||
|
||||
return atidlas_int_t(std::min(row_depth, col_depth));
|
||||
}
|
||||
|
||||
template<typename LHS>
|
||||
atidlas_int_t size(vector_expression<LHS, const unsigned int, op_row> const & proxy)
|
||||
{ return size2(proxy.lhs());}
|
||||
|
||||
template<typename LHS>
|
||||
atidlas_int_t size(vector_expression<LHS, const unsigned int, op_column> const & proxy)
|
||||
{ return size1(proxy.lhs());}
|
||||
|
||||
inline atidlas_int_t size(vector_base const & x)
|
||||
{ return x.size(); }
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
74
atidlas/vector.hpp
Normal file
74
atidlas/vector.hpp
Normal file
@@ -0,0 +1,74 @@
|
||||
#ifndef ATIDLAS_VECTOR_H
|
||||
#define ATIDLAS_VECTOR_H
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "atidlas/forwards.h"
|
||||
#include "atidlas/scheduler/forwards.h"
|
||||
#include "atidlas/expression_template.hpp"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
class vector : public vector_base
|
||||
{
|
||||
public:
|
||||
vector(atidlas_int_t size, numeric_type dtype, cl::Context context) : vector_base(size, dtype, context){}
|
||||
|
||||
template<typename T>
|
||||
vector & operator=(T const & other)
|
||||
{
|
||||
vector_base::operator=(other);
|
||||
return *this;
|
||||
}
|
||||
// using vector_base::operator+=;
|
||||
// using vector_base::operator-=;
|
||||
};
|
||||
|
||||
#define ATIDLAS_ADD_BINARY_OPERATOR(TYPE, OP) \
|
||||
template<typename XL, typename XR, typename XOP, \
|
||||
typename YL, typename YR, typename YOP> \
|
||||
TYPE ## _expression< const TYPE ## _expression< XL, XR, XOP>, const TYPE ## _expression< YL, YR, YOP>, OP> \
|
||||
operator + (TYPE ## _expression<XL, XR, XOP> const & x, TYPE ## _expression<YL, YR, YOP> const & y) \
|
||||
{ \
|
||||
assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \
|
||||
return TYPE ## _expression< const TYPE ## _expression<XL, XR, XOP>, const TYPE ## _expression<YL, YR, YOP>, OP>(x, y); \
|
||||
} \
|
||||
\
|
||||
template<typename XL, typename XR, typename XOP> \
|
||||
TYPE ## _expression< const TYPE ## _expression< XL, XR, XOP>, const TYPE ## _base, OP> \
|
||||
operator + (TYPE ## _expression<XL, XR, XOP> const & x, TYPE ## _base const & y) \
|
||||
{ \
|
||||
assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \
|
||||
return TYPE ## _expression< const TYPE ## _expression<XL, XR, XOP>, const TYPE ## _base, OP>(x, y); \
|
||||
} \
|
||||
\
|
||||
template<typename T, typename YL, typename YR, typename YOP> \
|
||||
TYPE ## _expression< const TYPE ## _expression< YL, YR, YOP>, const TYPE ## _base, OP> \
|
||||
operator + (TYPE ## _base const & x, TYPE ## _expression<YL, YR, YOP> const & y) \
|
||||
{ \
|
||||
assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \
|
||||
return TYPE ## _expression<const TYPE ## _base, const TYPE ## _expression<YL, YR, YOP>, OP>(x, y); \
|
||||
} \
|
||||
\
|
||||
TYPE ## _expression< const TYPE ## _base, const TYPE ## _base, OP> \
|
||||
operator + (TYPE ## _base const & x, TYPE ## _base const & y) \
|
||||
{ \
|
||||
assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \
|
||||
return TYPE ## _expression<const TYPE ## _base, const TYPE ## _base, OP>(x, y); \
|
||||
}
|
||||
|
||||
ATIDLAS_ADD_BINARY_OPERATOR(vector, op_add)
|
||||
|
||||
#undef ATIDLAS_ADD_BINARY_OPERATOR
|
||||
|
||||
template<class LHS, class RHS, class OP>
|
||||
vector_base & vector_base::operator=(vector_expression<LHS, RHS, OP> const & operation)
|
||||
{
|
||||
scheduler::statement s(*this, op_assign(), operation);
|
||||
return *this;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user