diff --git a/atidlas/backend/forwards.h b/atidlas/backend/forwards.h index b2773e7af..c18638dcb 100644 --- a/atidlas/backend/forwards.h +++ b/atidlas/backend/forwards.h @@ -7,13 +7,7 @@ #include #include "atidlas/tools/shared_ptr.hpp" - -#include "viennacl/scheduler/io.hpp" -#include "viennacl/ocl/forwards.h" -#include "viennacl/scheduler/forwards.h" -#include "viennacl/backend/mem_handle.hpp" -#include "viennacl/device_specific/forwards.h" - +#include "atidlas/scheduler/forwards.h" namespace atidlas { @@ -50,22 +44,22 @@ struct atidlas_int_tuple std::string bound1; }; -inline bool is_scalar_reduction(viennacl::scheduler::statement_node const & node) +inline bool is_scalar_reduction(scheduler::statement_node const & node) { - return node.op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE || node.op.type_family==viennacl::scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY; + return node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE || node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY; } -inline bool is_vector_reduction(viennacl::scheduler::statement_node const & node) +inline bool is_vector_reduction(scheduler::statement_node const & node) { - return node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE - || node.op.type_family==viennacl::scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY - || node.op.type_family==viennacl::scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY; + return node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE + || node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY + || node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY; } -inline viennacl::scheduler::statement_node const & lhs_most(viennacl::scheduler::statement::container_type const & array, size_t root) +inline scheduler::statement_node const & lhs_most(scheduler::statement::container_type const & array, size_t root) { - viennacl::scheduler::statement_node const * current = &array[root]; - while (current->lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + scheduler::statement_node const * current = &array[root]; + while (current->lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) current = &array[current->lhs.node_index]; return *current; } @@ -162,38 +156,27 @@ namespace tools { template - inline void traverse(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect); + inline void traverse(scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect); inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap const & accessors, - viennacl::scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set & already_processed); - inline std::string evaluate(leaf_t leaf, std::map const & accessors, viennacl::scheduler::statement const & statement, atidlas_int_t root_idx,mapping_type const & mapping); + scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set & already_processed); + inline std::string evaluate(leaf_t leaf, std::map const & accessors, scheduler::statement const & statement, atidlas_int_t root_idx,mapping_type const & mapping); } -using viennacl::scheduler::INT_TYPE; -using viennacl::scheduler::UINT_TYPE; -using viennacl::scheduler::ULONG_TYPE; -using viennacl::scheduler::LONG_TYPE; -using viennacl::scheduler::FLOAT_TYPE; -using viennacl::scheduler::DOUBLE_TYPE; - -typedef cl_uint vendor_id_type; -typedef cl_device_type device_type; -typedef std::string device_name_type; - class symbolic_binder { public: virtual ~symbolic_binder(){ } - virtual bool bind(viennacl::backend::mem_handle const * ph) = 0; - virtual unsigned int get(viennacl::backend::mem_handle const * ph) = 0; + virtual bool bind(cl::Buffer const * ph) = 0; + virtual unsigned int get(cl::Buffer const * ph) = 0; }; class bind_to_handle : public symbolic_binder { public: bind_to_handle() : current_arg_(0){ } - bool bind(viennacl::backend::mem_handle const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; } - unsigned int get(viennacl::backend::mem_handle const * ph){ return bind(ph)?current_arg_++:memory.at((void*)ph); } + bool bind(cl::Buffer const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; } + unsigned int get(cl::Buffer const * ph){ return bind(ph)?current_arg_++:memory.at((void*)ph); } private: unsigned int current_arg_; std::map memory; @@ -203,8 +186,8 @@ class bind_all_unique : public symbolic_binder { public: bind_all_unique() : current_arg_(0){ } - bool bind(viennacl::backend::mem_handle const *) {return true; } - unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; } + bool bind(cl::Buffer const *) {return true; } + unsigned int get(cl::Buffer const *){ return current_arg_++; } private: unsigned int current_arg_; std::map memory; @@ -226,8 +209,6 @@ inline tools::shared_ptr make_binder(binding_policy_t policy) template struct char_to_type{ }; -typedef viennacl::device_specific::statements_container statements_container; - } #endif diff --git a/atidlas/backend/mapped_objects.hpp b/atidlas/backend/mapped_objects.hpp index b62b36153..7b376c724 100644 --- a/atidlas/backend/mapped_objects.hpp +++ b/atidlas/backend/mapped_objects.hpp @@ -3,9 +3,9 @@ #include -#include "viennacl/scheduler/forwards.h" #include "atidlas/forwards.h" +#include "atidlas/scheduler/forwards.h" #include "atidlas/tools/find_and_replace.hpp" #include "atidlas/backend/tools/misc.hpp" @@ -63,10 +63,10 @@ protected: public: struct node_info { - node_info(mapping_type const * _mapping, viennacl::scheduler::statement const * _statement, atidlas_int_t _root_idx) : + node_info(mapping_type const * _mapping, scheduler::statement const * _statement, atidlas_int_t _root_idx) : mapping(_mapping), statement(_statement), root_idx(_root_idx) { } mapping_type const * mapping; - viennacl::scheduler::statement const * statement; + scheduler::statement const * statement; atidlas_int_t root_idx; }; @@ -153,16 +153,16 @@ public: mapped_reduction(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) : mapped_object(scalartype, id, type_key), binary_leaf(info){ } atidlas_int_t root_idx() const { return info_.root_idx; } - viennacl::scheduler::statement const & statement() const { return *info_.statement; } - viennacl::scheduler::statement_node root_node() const { return statement().array()[root_idx()]; } + scheduler::statement const & statement() const { return *info_.statement; } + scheduler::statement_node root_node() const { return statement().array()[root_idx()]; } bool is_index_reduction() const { return tools::is_index_reduction(info_.statement->array()[info_.root_idx].op); } - viennacl::scheduler::op_element root_op() const + scheduler::op_element root_op() const { - viennacl::scheduler::op_element res = info_.statement->array()[info_.root_idx].op; - if (res.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE - ||res.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE) - res.type = viennacl::scheduler::OPERATION_BINARY_ADD_TYPE; + scheduler::op_element res = info_.statement->array()[info_.root_idx].op; + if (res.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE + ||res.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE) + res.type = scheduler::OPERATION_BINARY_ADD_TYPE; return res; } }; diff --git a/atidlas/backend/templates/matrix_axpy.hpp b/atidlas/backend/templates/matrix_axpy.hpp index 5968920fd..fbb4cd575 100644 --- a/atidlas/backend/templates/matrix_axpy.hpp +++ b/atidlas/backend/templates/matrix_axpy.hpp @@ -5,9 +5,7 @@ #include #include "atidlas/backend/templates/template_base.hpp" - -#include "viennacl/scheduler/forwards.h" -#include "viennacl/tools/tools.hpp" +#include "atidlas/scheduler/forwards.h" namespace atidlas { @@ -28,7 +26,7 @@ public: class matrix_axpy_template : public template_base_impl { private: - int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const + int check_invalid_impl(cl::Device const &, statements_container const &) const { if (p_.simd_width>1) return TEMPLATE_INVALID_SIMD_WIDTH; @@ -99,14 +97,14 @@ public: std::vector input_sizes(statements_container const & statements) { - viennacl::scheduler::statement const & statement = statements.data().front(); + scheduler::statement const & statement = statements.data().front(); std::pair size = matrix_size(lhs_most(statement.array(), statement.root()), up_to_internal_size_); return tools::make_vector() << size.first << size.second; } void enqueue(std::string const & kernel_prefix, std::vector & programs, statements_container const & statements) { - viennacl::ocl::kernel & kernel = programs[0].program().get_kernel(kernel_prefix); + cl::Kernel & kernel = programs[0].program().get_kernel(kernel_prefix); kernel.local_work_size(0, p_.local_size_0); kernel.local_work_size(1, p_.local_size_1); @@ -119,7 +117,7 @@ public: kernel.arg(current_arg++, cl_uint(MN[1])); set_arguments(statements, kernel, current_arg); - viennacl::ocl::enqueue(kernel); +// viennacl::ocl::enqueue(kernel); } diff --git a/atidlas/backend/templates/matrix_product.hpp b/atidlas/backend/templates/matrix_product.hpp index 4ff863e10..a41245296 100644 --- a/atidlas/backend/templates/matrix_product.hpp +++ b/atidlas/backend/templates/matrix_product.hpp @@ -4,14 +4,9 @@ #include -#include "viennacl/scheduler/forwards.h" - -#include "viennacl/matrix_def.hpp" -#include "viennacl/matrix_proxy.hpp" -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" - +#include "atidlas/forwards.h" #include "atidlas/backend/templates/template_base.hpp" +#include "atidlas/scheduler/forwards.h" #include "atidlas/tools/align.hpp" namespace atidlas @@ -50,8 +45,8 @@ class matrix_product_template : public template_base_impl - void enqueue_block(viennacl::scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K, - viennacl::scheduler::lhs_rhs_element& eA, viennacl::scheduler::lhs_rhs_element& eB, viennacl::scheduler::lhs_rhs_element& eC, viennacl::scheduler::lhs_rhs_element& ebeta, + void enqueue_block(scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K, + scheduler::lhs_rhs_element& eA, scheduler::lhs_rhs_element& eB, scheduler::lhs_rhs_element& eC, scheduler::lhs_rhs_element& ebeta, viennacl::matrix_base const & A, viennacl::matrix_base const & B, viennacl::matrix_base const & C, NumericT beta, std::vector & programs, std::string const & kernel_prefix, int id) { if (A.size1()==0 || A.size2()==0 || B.size1()==0 || B.size2()==0 || C.size1()==0 || C.size2()==0) return; - viennacl::ocl::kernel& kernel = programs[id].program().get_kernel(kernel_prefix); + cl::Kernel& kernel = programs[id].program().get_kernel(kernel_prefix); kernel.local_work_size(0, p_.local_size_0); kernel.local_work_size(1, p_.local_size_1); - viennacl::scheduler::statement::assign_element(eA, A); - viennacl::scheduler::statement::assign_element(eB, B); - viennacl::scheduler::statement::assign_element(eC, C); - viennacl::scheduler::statement::assign_element(ebeta, beta); + scheduler::statement::assign_element(eA, A); + scheduler::statement::assign_element(eB, B); + scheduler::statement::assign_element(eC, C); + scheduler::statement::assign_element(ebeta, beta); if (id==1) { @@ -691,12 +686,12 @@ private: kernel.arg(current_arg++, cl_uint(N)); kernel.arg(current_arg++, cl_uint(K)); set_arguments(statement, kernel, current_arg); - viennacl::ocl::enqueue(kernel); +// viennacl::ocl::enqueue(kernel); } template - viennacl::matrix_slice< viennacl::matrix_base > create_slice(viennacl::matrix_base* viennacl::scheduler::lhs_rhs_element::*ptr, viennacl::scheduler::lhs_rhs_element const & element, + viennacl::matrix_slice< viennacl::matrix_base > create_slice(viennacl::matrix_base* scheduler::lhs_rhs_element::*ptr, scheduler::lhs_rhs_element const & element, atidlas_int_t s0_0, atidlas_int_t s0_1, atidlas_int_t s1_0, atidlas_int_t s1_1, bool swap) { viennacl::matrix_base & M = *(element.*ptr); @@ -708,20 +703,20 @@ private: } template - void enqueue_impl(viennacl::matrix_base* viennacl::scheduler::lhs_rhs_element::*ptr_matrix, - viennacl::scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K, - viennacl::scheduler::lhs_rhs_element & A, viennacl::scheduler::lhs_rhs_element & B, viennacl::scheduler::lhs_rhs_element & C, viennacl::scheduler::lhs_rhs_element & beta, + void enqueue_impl(viennacl::matrix_base* scheduler::lhs_rhs_element::*ptr_matrix, + scheduler::statement & statement, atidlas_int_t M, atidlas_int_t N, atidlas_int_t K, + scheduler::lhs_rhs_element & A, scheduler::lhs_rhs_element & B, scheduler::lhs_rhs_element & C, scheduler::lhs_rhs_element & beta, NumericT beta_value, std::vector & programs, std::string const & kernel_prefix) { using namespace tools; std::string kernel_prefix_fb = kernel_prefix + "_fb"; - atidlas_int_t ldstrideA = call_on_matrix(A, leading_stride_fun()); - atidlas_int_t ldstrideB = call_on_matrix(B, leading_stride_fun()); - atidlas_int_t ldstrideC = call_on_matrix(C, leading_stride_fun()); - atidlas_int_t ldstartA = call_on_matrix(A, leading_start_fun()); - atidlas_int_t ldstartB = call_on_matrix(B, leading_start_fun()); + atidlas_int_t ldstrideA = traits::ldstride(*A.matrix); + atidlas_int_t ldstrideB = traits::ldstride(*B.matrix); + atidlas_int_t ldstrideC = traits::ldstride(*C.matrix); + atidlas_int_t ldstartA = traits::ldstart(*A.matrix); + atidlas_int_t ldstartB = traits::ldstart(*B.matrix); bool swap_A = (A_trans_=='T'); bool swap_B = (B_trans_=='T'); @@ -735,9 +730,9 @@ private: } - viennacl::scheduler::lhs_rhs_element Acopy = A; - viennacl::scheduler::lhs_rhs_element Bcopy = B; - viennacl::scheduler::lhs_rhs_element Ccopy = C; + scheduler::lhs_rhs_element Acopy = A; + scheduler::lhs_rhs_element Bcopy = B; + scheduler::lhs_rhs_element Ccopy = C; atidlas_int_t lM = M / p_.mL * p_.mL; atidlas_int_t lN = N / p_.nL * p_.nL; @@ -764,14 +759,14 @@ private: { using namespace tools; - viennacl::scheduler::statement const & st = statements.data().front(); + scheduler::statement const & st = statements.data().front(); parse(st, C_idx, C_leaf, alpha_idx, alpha_leaf, A_idx, A_leaf, A_trans, B_idx, B_leaf, B_trans, beta_idx, beta_leaf); - viennacl::scheduler::lhs_rhs_element const & A = tools::lhs_rhs_element(st, A_idx, A_leaf); - viennacl::scheduler::lhs_rhs_element const & C = tools::lhs_rhs_element(st, C_idx, C_leaf); + scheduler::lhs_rhs_element const & A = tools::lhs_rhs_element(st, A_idx, A_leaf); + scheduler::lhs_rhs_element const & C = tools::lhs_rhs_element(st, C_idx, C_leaf); - atidlas_int_t M = call_on_matrix(C, size1_fun()); - atidlas_int_t N = call_on_matrix(C, size2_fun()); - atidlas_int_t K = A_trans?call_on_matrix(A, size1_fun()):call_on_matrix(A, size2_fun()); + atidlas_int_t M = traits::size1(*C.matrix); + atidlas_int_t N = traits::size2(*C.matrix); + atidlas_int_t K = A_trans?traits::size1(*A.matrix):traits::size2(*A.matrix); return tools::make_vector() << M << N << K; } @@ -796,16 +791,16 @@ public: leaf_t C_leaf=LHS_NODE_TYPE, A_leaf=LHS_NODE_TYPE, B_leaf=LHS_NODE_TYPE, alpha_leaf=LHS_NODE_TYPE, beta_leaf=LHS_NODE_TYPE; std::vector MNK = infos(statements,A_trans,B_trans,C_idx,A_idx,B_idx,alpha_idx,beta_idx,C_leaf,A_leaf,B_leaf,alpha_leaf,beta_leaf); - viennacl::scheduler::statement stcopy = statements.data().front(); - viennacl::scheduler::lhs_rhs_element& A = tools::lhs_rhs_element(stcopy, A_idx, A_leaf); - viennacl::scheduler::lhs_rhs_element& B = tools::lhs_rhs_element(stcopy, B_idx, B_leaf); - viennacl::scheduler::lhs_rhs_element& C = tools::lhs_rhs_element(stcopy, C_idx, C_leaf); - viennacl::scheduler::lhs_rhs_element& beta = tools::lhs_rhs_element(stcopy, beta_idx, beta_leaf); + scheduler::statement stcopy = statements.data().front(); + scheduler::lhs_rhs_element& A = tools::lhs_rhs_element(stcopy, A_idx, A_leaf); + scheduler::lhs_rhs_element& B = tools::lhs_rhs_element(stcopy, B_idx, B_leaf); + scheduler::lhs_rhs_element& C = tools::lhs_rhs_element(stcopy, C_idx, C_leaf); + scheduler::lhs_rhs_element& beta = tools::lhs_rhs_element(stcopy, beta_idx, beta_leaf); - if (C.numeric_type==viennacl::scheduler::FLOAT_TYPE) - enqueue_impl(&viennacl::scheduler::lhs_rhs_element::matrix_float, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_float, programs, kernel_prefix); - else if (C.numeric_type==viennacl::scheduler::DOUBLE_TYPE) - enqueue_impl(&viennacl::scheduler::lhs_rhs_element::matrix_double, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_double, programs, kernel_prefix); + if (C.numeric_type==scheduler::FLOAT_TYPE) + enqueue_impl(&scheduler::lhs_rhs_element::matrix_float, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_float, programs, kernel_prefix); + else if (C.numeric_type==scheduler::DOUBLE_TYPE) + enqueue_impl(&scheduler::lhs_rhs_element::matrix_double, stcopy, MNK[0], MNK[1], MNK[2], A, B, C, beta, beta.host_double, programs, kernel_prefix); else throw generator_not_supported_exception("GEMM only supported for float/double"); diff --git a/atidlas/backend/templates/reduction.hpp b/atidlas/backend/templates/reduction.hpp index 531d1f595..4a1f1d7b2 100644 --- a/atidlas/backend/templates/reduction.hpp +++ b/atidlas/backend/templates/reduction.hpp @@ -4,10 +4,7 @@ #include -#include "viennacl/backend/opencl.hpp" -#include "viennacl/scheduler/forwards.h" -#include "viennacl/tools/tools.hpp" - +#include "atidlas/scheduler/forwards.h" #include "atidlas/backend/templates/template_base.hpp" namespace atidlas @@ -33,8 +30,8 @@ private: unsigned int res = 0; for(statements_container::data_type::const_iterator it = statements.data().begin() ; it != statements.data().end() ; ++it) { - viennacl::scheduler::statement const & statement = statements.data().front(); - viennacl::scheduler::statement_node_numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type; + scheduler::statement const & statement = statements.data().front(); + scheduler::numeric_type numeric_type = lhs_most(statement.array(), statement.root()).lhs.numeric_type; res += p_.local_size_0*tools::size_of(numeric_type); } return res; @@ -161,7 +158,7 @@ private: accessors["matrix_diag"] = str[a]; accessors["scalar"] = "#namereg"; std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors); - if (exprs[k]->root_node().op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE) + if (exprs[k]->root_node().op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE) value+= "*" + exprs[k]->evaluate_recursive(RHS_NODE_TYPE, accessors); if (exprs[k]->is_index_reduction()) @@ -294,7 +291,7 @@ public: { std::vector size = input_sizes(statements); - std::vector reductions; + std::vector reductions; for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) { std::vector reductions_idx = tools::filter_nodes(&tools::is_reduction, *it, false); @@ -302,7 +299,7 @@ public: reductions.push_back(&it->array()[*itt]); } - viennacl::scheduler::statement const & statement = statements.data().front(); + scheduler::statement const & statement = statements.data().front(); unsigned int scalartype_size = tools::size_of(lhs_most(statement.array(), statement.root()).lhs.numeric_type); viennacl::ocl::kernel * kernels[2]; @@ -329,7 +326,7 @@ public: kernels[k]->arg(n_arg++, cl_uint(size[0])); unsigned int i = 0; unsigned int j = 0; - for (std::vector::const_iterator it = reductions.begin(); it != reductions.end(); ++it) + for (std::vector::const_iterator it = reductions.begin(); it != reductions.end(); ++it) { if (tools::is_index_reduction((*it)->op)) { diff --git a/atidlas/backend/templates/row_wise_reduction.hpp b/atidlas/backend/templates/row_wise_reduction.hpp index 757cf3e29..a25ec9da1 100644 --- a/atidlas/backend/templates/row_wise_reduction.hpp +++ b/atidlas/backend/templates/row_wise_reduction.hpp @@ -4,10 +4,8 @@ #include -#include "viennacl/scheduler/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/scheduler/io.hpp" - +#include "atidlas/scheduler/forwards.h" +#include "atidlas/traits/size.hpp" #include "atidlas/backend/templates/template_base.hpp" namespace atidlas @@ -27,7 +25,7 @@ struct row_wise_reduction_parameters : public template_base::parameters_type class row_wise_reduction_template : public template_base_impl { private: - virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const + virtual int check_invalid_impl(cl::Device const &, statements_container const &) const { if (p_.fetch_policy==FETCH_FROM_LOCAL) return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; @@ -39,7 +37,7 @@ private: return p_.local_size_0*(p_.local_size_1+1); } - static void parse(viennacl::scheduler::statement const & statement, std::vector & idx, bool & is_trans, viennacl::scheduler::lhs_rhs_element & matrix) + static void parse(scheduler::statement const & statement, std::vector & idx, bool & is_trans, scheduler::lhs_rhs_element & matrix) { idx = tools::filter_nodes(&tools::is_reduction, statement, false); is_trans = is_node_trans(statement.array(), idx[0], LHS_NODE_TYPE); @@ -125,7 +123,7 @@ private: accessors["vector"] = str[a]; accessors["scalar"] = "#namereg"; std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors); - if (exprs[k]->root_node().op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE) + if (exprs[k]->root_node().op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE) value+= "*" + exprs[k]->evaluate_recursive(RHS_NODE_TYPE, accessors); if (exprs[k]->is_index_reduction()) @@ -201,7 +199,7 @@ private: for (mit = mappings.begin(), sit = statements.data().begin(); mit != mappings.end(); ++mit, ++sit) { std::vector idx; - viennacl::scheduler::lhs_rhs_element A; + scheduler::lhs_rhs_element A; parse(*sit, idx, is_trans, A); for (unsigned int j = 0; j < idx.size(); ++j) exprs.push_back((mapped_row_wise_reduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get())); @@ -222,10 +220,10 @@ private: std::vector infos(statements_container const & statements, bool & is_trans) { std::vector idx; - viennacl::scheduler::lhs_rhs_element A; + scheduler::lhs_rhs_element A; parse(statements.data().front(), idx, is_trans, A); - atidlas_int_t M = tools::call_on_matrix(A, tools::size1_fun()); - atidlas_int_t N = tools::call_on_matrix(A, tools::size2_fun()); + atidlas_int_t M = traits::size1(*A.matrix); + atidlas_int_t N = traits::size2(*A.matrix); if(is_trans) std::swap(M,N); return tools::make_vector() << M << N; @@ -245,7 +243,7 @@ public: bool is_trans; std::vector MN = infos(statements, is_trans); - viennacl::ocl::kernel * kernel; + cl::Kernel * kernel; if(is_trans && p_.simd_width>1) { if (has_strided_access(statements)) @@ -264,7 +262,7 @@ public: kernel->arg(current_arg++, cl_uint(MN[0])); kernel->arg(current_arg++, cl_uint(MN[1])); set_arguments(statements, *kernel, current_arg); - viennacl::ocl::enqueue(*kernel); +// cl::CommandQueue().enqueue() } }; diff --git a/atidlas/backend/templates/template_base.hpp b/atidlas/backend/templates/template_base.hpp index e623a59be..3078f5764 100644 --- a/atidlas/backend/templates/template_base.hpp +++ b/atidlas/backend/templates/template_base.hpp @@ -5,13 +5,7 @@ #include #include -#include "viennacl/ocl/kernel.hpp" -#include "viennacl/ocl/device.hpp" -#include "viennacl/ocl/device_utils.hpp" - -#include "viennacl/scheduler/forwards.h" -#include "viennacl/scheduler/io.hpp" - +#include "atidlas/scheduler/forwards.h" #include "atidlas/tools/lazy_program_compiler.hpp" #include "atidlas/backend/templates/template_base.hpp" #include "atidlas/backend/tools/misc.hpp" @@ -45,94 +39,95 @@ private: /** @brief Functor to map the statements to the types defined in mapped_objects.hpp */ class map_functor : public tools::traversal_functor { - - viennacl::scheduler::statement_node_numeric_type numeric_type(viennacl::scheduler::statement const * statement, atidlas_int_t root_idx) const + numeric_type get_numeric_type(scheduler::statement const * statement, atidlas_int_t root_idx) const { - viennacl::scheduler::statement_node const * root_node = &statement->array()[root_idx]; - while (root_node->lhs.numeric_type==viennacl::scheduler::INVALID_NUMERIC_TYPE) + scheduler::statement_node const * root_node = &statement->array()[root_idx]; + while (root_node->lhs.numeric_t==INVALID_NUMERIC_TYPE) root_node = &statement->array()[root_node->lhs.node_index]; - return root_node->lhs.numeric_type; + return root_node->lhs.numeric_t; } - public: - typedef tools::shared_ptr result_type; - - map_functor(symbolic_binder & binder, mapping_type & mapping) : binder_(binder), mapping_(mapping){ } - /** @brief Binary leaf */ template - result_type binary_leaf(viennacl::scheduler::statement const * statement, atidlas_int_t root_idx, mapping_type const * mapping) const + tools::shared_ptr binary_leaf(scheduler::statement const * statement, atidlas_int_t root_idx, mapping_type const * mapping) const { - return result_type(new T(tools::numeric_type_to_string(numeric_type(statement,root_idx)), binder_.get(NULL), mapped_object::node_info(mapping, statement, root_idx))); + return tools::shared_ptr(new T(tools::numeric_type_to_string(get_numeric_type(statement,root_idx)), binder_.get(NULL), mapped_object::node_info(mapping, statement, root_idx))); } - template - result_type operator()(NumericT const & /*scalar*/) const - { - return result_type(new mapped_host_scalar(tools::type_to_string::value(), binder_.get(NULL))); - } +// template +// tools::shared_ptr operator()(NumericT const & /*scalar*/) const +// { +// return tools::shared_ptr(new mapped_host_scalar(tools::type_to_string::value(), binder_.get(NULL))); +// } - /** @brief Scalar mapping */ - template - result_type operator()(viennacl::scalar const & scal) const - { - return result_type(new mapped_scalar(tools::type_to_string::value(), binder_.get(&viennacl::traits::handle(scal)))); - } +// /** @brief Scalar mapping */ +// template +// tools::shared_ptr operator()(viennacl::scalar const & scal) const +// { +// return tools::shared_ptr(new mapped_scalar(tools::type_to_string::value(), binder_.get(&viennacl::traits::handle(scal)))); +// } /** @brief Vector mapping */ - template - result_type operator()(viennacl::vector_base const & vec) const + tools::shared_ptr create_vector(vector_base const & vector) const + { return tools::shared_ptr(new mapped_vector(tools::numeric_type_to_string(vector.dtype()), binder_.get(&vector.data()))); } + +// /** @brief Implicit vector mapping */ +// template +// tools::shared_ptr operator()(viennacl::implicit_vector_base const & /*vec*/) const +// { +// return tools::shared_ptr(new mapped_implicit_vector(tools::type_to_string::value(), binder_.get(NULL))); +// } + +// /** @brief Matrix mapping */ +// template +// tools::shared_ptr operator()(viennacl::matrix_base const & mat) const +// { +// return tools::shared_ptr(new mapped_matrix(tools::type_to_string::value(), binder_.get(&viennacl::traits::handle(mat)))); +// } + +// /** @brief Implicit matrix mapping */ +// template +// tools::shared_ptr operator()(viennacl::implicit_matrix_base const & /*mat*/) const +// { +// return tools::shared_ptr(new mapped_implicit_matrix(tools::type_to_string::value(), binder_.get(NULL))); +// } + + tools::shared_ptr create(scheduler::lhs_rhs_element const & lhs_rhs) const { - return result_type(new mapped_vector(tools::type_to_string::value(), binder_.get(&viennacl::traits::handle(vec)))); +// if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE) + return create_vector(*lhs_rhs.vector); } - /** @brief Implicit vector mapping */ - template - result_type operator()(viennacl::implicit_vector_base const & /*vec*/) const - { - return result_type(new mapped_implicit_vector(tools::type_to_string::value(), binder_.get(NULL))); - } + public: - /** @brief Matrix mapping */ - template - result_type operator()(viennacl::matrix_base const & mat) const - { - return result_type(new mapped_matrix(tools::type_to_string::value(), binder_.get(&viennacl::traits::handle(mat)))); - } - - /** @brief Implicit matrix mapping */ - template - result_type operator()(viennacl::implicit_matrix_base const & /*mat*/) const - { - return result_type(new mapped_implicit_matrix(tools::type_to_string::value(), binder_.get(NULL))); - } + map_functor(symbolic_binder & binder, mapping_type & mapping) : binder_(binder), mapping_(mapping){ } /** @brief Traversal functor */ - void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const { + void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const { mapping_type::key_type key(root_idx, leaf_t); - viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx]; + scheduler::statement_node const & root_node = statement.array()[root_idx]; - if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) - mapping_.insert(mapping_type::value_type(key, tools::call_on_element(root_node.lhs, *this))); - else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) - mapping_.insert(mapping_type::value_type(key, tools::call_on_element(root_node.rhs, *this))); + if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) + mapping_.insert(mapping_type::value_type(key, create(root_node.lhs))); + else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) + mapping_.insert(mapping_type::value_type(key, create(root_node.rhs))); else if ( leaf_t== PARENT_NODE_TYPE) { - if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE) + if (root_node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(&statement, root_idx, &mapping_))); - else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE) + else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(&statement, root_idx, &mapping_))); - else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE) + else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(&statement, root_idx, &mapping_))); - else if (root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE) + else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(&statement, root_idx, &mapping_))); else if (is_scalar_reduction(root_node)) mapping_.insert(mapping_type::value_type(key, binary_leaf(&statement, root_idx, &mapping_))); else if (is_vector_reduction(root_node)) mapping_.insert(mapping_type::value_type(key, binary_leaf(&statement, root_idx, &mapping_))); - else if (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE) + else if (root_node.op.type == scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(&statement, root_idx, &mapping_))); - else if (root_node.op.type == viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE) + else if (root_node.op.type == scheduler::OPERATION_UNARY_TRANS_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(&statement, root_idx, &mapping_))); } } @@ -148,86 +143,91 @@ private: public: typedef void result_type; - set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, viennacl::ocl::kernel & kernel) : binder_(binder), current_arg_(current_arg), kernel_(kernel){ } + set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, cl::Kernel & kernel) : binder_(binder), current_arg_(current_arg), kernel_(kernel){ } - template - result_type operator()(NumericT const & scal) const - { - typedef typename viennacl::result_of::cl_type::type cl_scalartype; - kernel_.arg(current_arg_++, cl_scalartype(scal)); - } +// template +// void operator()(NumericT const & scal) const +// { +// typedef typename viennacl::result_of::cl_type::type cl_scalartype; +// kernel_.arg(current_arg_++, cl_scalartype(scal)); +// } - /** @brief Scalar mapping */ - template - result_type operator()(viennacl::scalar const & scal) const - { - if (binder_.bind(&viennacl::traits::handle(scal))) - kernel_.arg(current_arg_++, scal.handle().opencl_handle()); - } +// /** @brief Scalar mapping */ +// template +// void operator()(viennacl::scalar const & scal) const +// { +// if (binder_.bind(&viennacl::traits::handle(scal))) +// kernel_.arg(current_arg_++, scal.handle().opencl_handle()); +// } /** @brief Vector mapping */ - template - result_type operator()(viennacl::vector_base const & vec) const + void set_vector_arguments(vector_base const & v) const { - if (binder_.bind(&viennacl::traits::handle(vec))) + if (binder_.bind(&v.data())) { - kernel_.arg(current_arg_++, vec.handle().opencl_handle()); - kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start(vec))); - kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride(vec))); + kernel_.setArg(current_arg_++, v.data()); + kernel_.setArg(current_arg_++, cl_uint(v.start())); + kernel_.setArg(current_arg_++, cl_uint(v.stride())); } } - /** @brief Implicit vector mapping */ - template - result_type operator()(viennacl::implicit_vector_base const & vec) const - { - typedef typename viennacl::result_of::cl_type::type cl_scalartype; - kernel_.arg(current_arg_++, cl_scalartype(vec.value())); - if (vec.has_index()) - kernel_.arg(current_arg_++, cl_uint(vec.index())); - } +// /** @brief Implicit vector mapping */ +// template +// void operator()(viennacl::implicit_vector_base const & vec) const +// { +// typedef typename viennacl::result_of::cl_type::type cl_scalartype; +// kernel_.arg(current_arg_++, cl_scalartype(vec.value())); +// if (vec.has_index()) +// kernel_.arg(current_arg_++, cl_uint(vec.index())); +// } - /** @brief Matrix mapping */ - template - result_type operator()(viennacl::matrix_base const & mat) const - { - if (binder_.bind(&viennacl::traits::handle(mat))) - { - kernel_.arg(current_arg_++, mat.handle().opencl_handle()); - kernel_.arg(current_arg_++, cl_uint(viennacl::traits::ld(mat))); - kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat))); - kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat))); - kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat))); - kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat))); - } - } +// /** @brief Matrix mapping */ +// template +// void operator()(viennacl::matrix_base const & mat) const +// { +// if (binder_.bind(&viennacl::traits::handle(mat))) +// { +// kernel_.arg(current_arg_++, mat.handle().opencl_handle()); +// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::ld(mat))); +// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat))); +// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat))); +// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat))); +// kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat))); +// } +// } - /** @brief Implicit matrix mapping */ - template - result_type operator()(viennacl::implicit_matrix_base const & mat) const +// /** @brief Implicit matrix mapping */ +// template +// void operator()(viennacl::implicit_matrix_base const & mat) const +// { +// kernel_.arg(current_arg_++, typename viennacl::result_of::cl_type::type(mat.value())); +// } + + void set_arguments(scheduler::lhs_rhs_element const & lhs_rhs) const { - kernel_.arg(current_arg_++, typename viennacl::result_of::cl_type::type(mat.value())); +// if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE) + set_vector_arguments(*lhs_rhs.vector); } /** @brief Traversal functor: */ - void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const + void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const { - viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx]; - if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) - tools::call_on_element(root_node.lhs, *this); - else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) - tools::call_on_element(root_node.rhs, *this); + scheduler::statement_node const & root_node = statement.array()[root_idx]; + if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) + set_arguments(root_node.lhs); + else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) + set_arguments(root_node.rhs); } private: symbolic_binder & binder_; unsigned int & current_arg_; - viennacl::ocl::kernel & kernel_; + cl::Kernel & kernel_; }; protected: - static inline void compute_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, viennacl::scheduler::op_element const & op) + static inline void compute_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, scheduler::op_element const & op) { if (tools::elementwise_function(op)) os << acc << "=" << tools::evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl; @@ -235,15 +235,15 @@ protected: os << acc << "= (" << acc << ")" << tools::evaluate(op.type) << "(" << cur << ");" << std::endl; } - static inline void compute_index_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, viennacl::scheduler::op_element const & op) + static inline void compute_index_reduction(tools::kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, scheduler::op_element const & op) { // os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl; os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl; os << acc_value << "="; - if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE) os << "fmax"; - if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE) os << "max"; - if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE) os << "fmin"; - if (op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE) os << "min"; + if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE) os << "fmax"; + if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE) os << "max"; + if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE) os << "fmin"; + if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE) os << "min"; os << "(" << acc_value << "," << cur_value << ");"<< std::endl; } @@ -269,27 +269,27 @@ protected: } } - static inline std::string neutral_element(viennacl::scheduler::op_element const & op) + static inline std::string neutral_element(scheduler::op_element const & op) { switch (op.type) { - case viennacl::scheduler::OPERATION_BINARY_ADD_TYPE : return "0"; - case viennacl::scheduler::OPERATION_BINARY_MULT_TYPE : return "1"; - case viennacl::scheduler::OPERATION_BINARY_DIV_TYPE : return "1"; - case viennacl::scheduler::OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "-INFINITY"; - case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "-INFINITY"; - case viennacl::scheduler::OPERATION_BINARY_ELEMENT_MAX_TYPE : return "-INFINITY"; - case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "-INFINITY"; - case viennacl::scheduler::OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "INFINITY"; - case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "INFINITY"; - case viennacl::scheduler::OPERATION_BINARY_ELEMENT_MIN_TYPE : return "INFINITY"; - case viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "INFINITY"; + case scheduler::OPERATION_BINARY_ADD_TYPE : return "0"; + case scheduler::OPERATION_BINARY_MULT_TYPE : return "1"; + case scheduler::OPERATION_BINARY_DIV_TYPE : return "1"; + case scheduler::OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "-INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "-INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_MAX_TYPE : return "-INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "-INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_MIN_TYPE : return "INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "INFINITY"; default: throw generator_not_supported_exception("Unsupported reduction operator : no neutral element known"); } } - static std::string generate_arguments(std::vector const & mappings, std::multimap const & accessors, statements_container const & statements) + static std::string generate_arguments(std::vector const & mappings, std::multimap const & accessors, scheduler::statements_container const & statements) { tools::kernel_generation_stream stream; tools::process(stream, PARENT_NODE_TYPE, accessors, statements, mappings); @@ -308,7 +308,7 @@ protected: return "__global " + data_type + "* #pointer, uint #start, uint #stride,"; } - static std::string generate_arguments(std::string const & data_type, std::vector const & mappings, statements_container const & statements) + static std::string generate_arguments(std::string const & data_type, std::vector const & mappings, scheduler::statements_container const & statements) { return generate_arguments(mappings, tools::create_process_accessors("scalar", "__global #scalartype* #pointer,") ("host_scalar", "#scalartype #name,") @@ -320,11 +320,11 @@ protected: - void set_arguments(statements_container const & statements, viennacl::ocl::kernel & kernel, unsigned int & current_arg) + void set_arguments(scheduler::statements_container const & statements, cl::Kernel & kernel, unsigned int & current_arg) { tools::shared_ptr binder = make_binder(binding_policy_); - for (statements_container::data_type::const_iterator itt = statements.data().begin(); itt != statements.data().end(); ++itt) - tools::traverse(*itt, itt->root(), set_arguments_functor(*binder,current_arg,kernel), true); + for (scheduler::statements_container::data_type::const_iterator itt = statements.data().begin(); itt != statements.data().end(); ++itt) + tools::traverse(*itt, itt->root(), set_arguments_functor(*binder, current_arg, kernel), true); } class invalid_template_exception : public std::exception @@ -364,18 +364,18 @@ protected: } } - static bool is_node_trans(viennacl::scheduler::statement::container_type const & array, size_t root_idx, leaf_t leaf_type) + static bool is_node_trans(scheduler::statement::container_type const & array, size_t root_idx, leaf_t leaf_type) { bool res = false; - viennacl::scheduler::lhs_rhs_element viennacl::scheduler::statement_node::*ptr; + scheduler::lhs_rhs_element scheduler::statement_node::*ptr; if (leaf_type==LHS_NODE_TYPE) - ptr = &viennacl::scheduler::statement_node::lhs; + ptr = &scheduler::statement_node::lhs; else - ptr = &viennacl::scheduler::statement_node::rhs; - viennacl::scheduler::statement_node const * node = &array[root_idx]; - while ((node->*ptr).type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + ptr = &scheduler::statement_node::rhs; + scheduler::statement_node const * node = &array[root_idx]; + while ((node->*ptr).type_family==scheduler::COMPOSITE_OPERATION_FAMILY) { - if (array[(node->*ptr).node_index].op.type==viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE) + if (array[(node->*ptr).node_index].op.type==scheduler::OPERATION_UNARY_TRANS_TYPE) res = !res; node = &array[(node->*ptr).node_index]; } @@ -392,28 +392,28 @@ protected: return str + tools::to_string(suffixes[i]); } - static bool is_offset_modifier(viennacl::scheduler::statement_node const & node) + static bool is_offset_modifier(scheduler::statement_node const & node) { - return node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE - || node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE - || node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE - || node.op.type==viennacl::scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE; + return node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE + || node.op.type==scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE + || node.op.type==scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE + || node.op.type==scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE; } - static bool has_strided_access(statements_container const & statements) + static bool has_strided_access(scheduler::statements_container const & statements) { - for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) + for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) { //checks for vectors - std::vector vectors = tools::filter_elements(viennacl::scheduler::DENSE_VECTOR_TYPE, *it); - for (std::vector::iterator itt = vectors.begin(); itt != vectors.end(); ++itt) - if (tools::call_on_vector(*itt, tools::stride_fun())>1) + std::vector vectors = tools::filter_elements(scheduler::DENSE_VECTOR_TYPE, *it); + for (std::vector::iterator itt = vectors.begin(); itt != vectors.end(); ++itt) + if(itt->vector->stride()) return true; //checks for matrix - std::vector matrices = tools::filter_elements(viennacl::scheduler::DENSE_MATRIX_TYPE, *it); - for (std::vector::iterator itt = matrices.begin(); itt != matrices.end(); ++itt) - if (tools::call_on_matrix(*itt, tools::stride1_fun())>1 || tools::call_on_matrix(*itt, tools::stride2_fun())>2) + std::vector matrices = tools::filter_elements(scheduler::DENSE_MATRIX_TYPE, *it); + for (std::vector::iterator itt = matrices.begin(); itt != matrices.end(); ++itt) + if (itt->matrix->stride1() > 1 || itt->matrix->stride2() > 1) return true; if(tools::filter_nodes(&is_offset_modifier, *it, true).empty()==false) @@ -422,42 +422,42 @@ protected: return false; } - static atidlas_int_t vector_size(viennacl::scheduler::statement_node const & node, bool up_to_internal_size) + static atidlas_int_t vector_size(scheduler::statement_node const & node, bool up_to_internal_size) { - using namespace viennacl::scheduler; + using namespace scheduler; using namespace tools; + + atidlas_int_t (vector_base::*funsize)(void) const = up_to_internal_size?&vector_base::internal_size:&vector_base::size; + atidlas_int_t (matrix_base::*funsize1)(void) const = up_to_internal_size?&matrix_base::internal_size1:&matrix_base::size1; + atidlas_int_t (matrix_base::*funsize2)(void) const = up_to_internal_size?&matrix_base::internal_size2:&matrix_base::size2; + if (node.op.type==OPERATION_BINARY_MATRIX_DIAG_TYPE) - { - atidlas_int_t size1 = up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun()); - atidlas_int_t size2 = up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun()); - return std::min(size1, size2); - } + return std::min((node.lhs.matrix->*funsize1)(), (node.lhs.matrix->*funsize2)()); else if (node.op.type==OPERATION_BINARY_MATRIX_ROW_TYPE) - return up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun()); + return (node.lhs.matrix->*funsize2)(); else if (node.op.type==OPERATION_BINARY_MATRIX_COLUMN_TYPE) - return up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun()); + return (node.lhs.matrix->*funsize1)(); else - return up_to_internal_size?call_on_vector(node.lhs, internal_size_fun()):call_on_vector(node.lhs, size_fun()); + return (node.lhs.vector->*funsize)(); + } - static std::pair matrix_size(viennacl::scheduler::statement_node const & node, bool up_to_internal_size) + static std::pair matrix_size(scheduler::statement_node const & node, bool up_to_internal_size) { using namespace tools; - if (node.op.type==viennacl::scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE) + atidlas_int_t (vector_base::*funsize)() const = up_to_internal_size?&vector_base::internal_size:&vector_base::size; + atidlas_int_t (matrix_base::*funsize1)() const = up_to_internal_size?&matrix_base::internal_size1:&matrix_base::size1; + atidlas_int_t (matrix_base::*funsize2)() const = up_to_internal_size?&matrix_base::internal_size2:&matrix_base::size2; + + if (node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE) { - atidlas_int_t is = call_on_vector(node.lhs, internal_size_fun()); - atidlas_int_t s = call_on_vector(node.lhs, size_fun()); - return up_to_internal_size?std::make_pair(is,is):std::make_pair(s,s); + atidlas_int_t size = (node.lhs.vector->*funsize)(); + return std::make_pair(size,size); } else - { - atidlas_int_t size1 = up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun()); - atidlas_int_t size2 = up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun()); - return std::make_pair(size1, size2); - } + return std::make_pair((node.lhs.matrix->*funsize1)(), (node.lhs.matrix->*funsize2)()); } - //NB : templates are not used here because declaring a functor out of the generate() functions would be harder to read struct loop_body_base { virtual void operator()(tools::kernel_generation_stream & stream, unsigned int simd_width) const = 0; @@ -507,22 +507,22 @@ protected: private: /** @brief Generates the body of the associated kernel function */ - virtual std::vector generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector const & mapping) const = 0; + virtual std::vector generate_impl(std::string const & kernel_prefix, scheduler::statements_container const & statements, std::vector const & mapping) const = 0; public: template_base(binding_policy_t binding_policy) : binding_policy_(binding_policy) {} - virtual unsigned int lmem_usage(statements_container const &) const { return 0; } + virtual unsigned int lmem_usage(scheduler::statements_container const &) const { return 0; } - virtual unsigned int registers_usage(statements_container const &) const { return 0; } + virtual unsigned int registers_usage(scheduler::statements_container const &) const { return 0; } - virtual std::vector input_sizes(statements_container const & statements) = 0; + virtual std::vector input_sizes(scheduler::statements_container const & statements) = 0; virtual ~template_base(){ } - std::vector generate(std::string const & kernel_prefix, statements_container const & statements, viennacl::ocl::device const & device) + std::vector generate(std::string const & kernel_prefix, scheduler::statements_container const & statements, cl::Device const & device) { - statements_container::data_type::const_iterator sit; + scheduler::statements_container::data_type::const_iterator sit; std::vector::iterator mit; if(int err = check_invalid(statements, device)) @@ -538,9 +538,9 @@ public: } /** @brief returns whether or not the profile has undefined behavior on particular device */ - virtual int check_invalid(statements_container const & statements, viennacl::ocl::device const & device) const = 0; + virtual int check_invalid(scheduler::statements_container const & statements, cl::Device const & device) const = 0; - virtual void enqueue(std::string const & kernel_prefix, std::vector & programs, statements_container const & statements) = 0; + virtual void enqueue(std::string const & kernel_prefix, std::vector & programs, scheduler::statements_container const & statements) = 0; virtual tools::shared_ptr clone() const = 0; @@ -553,23 +553,23 @@ template class template_base_impl : public template_base { private: - virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const { return TEMPLATE_VALID; } + virtual int check_invalid_impl(cl::Device const &, scheduler::statements_container const &) const { return TEMPLATE_VALID; } protected: - bool has_misaligned_offset(statements_container const & statements) + bool has_misaligned_offset(scheduler::statements_container const & statements) { - for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) + for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) { //checks for vectors - std::vector vectors = tools::filter_elements(viennacl::scheduler::DENSE_VECTOR_TYPE, *it); - for (std::vector::iterator itt = vectors.begin(); itt != vectors.end(); ++itt) - if (tools::call_on_vector(*itt, tools::stride_fun())>1) + std::vector vectors = tools::filter_elements(scheduler::DENSE_VECTOR_TYPE, *it); + for (std::vector::iterator itt = vectors.begin(); itt != vectors.end(); ++itt) + if (itt->vector->stride()>1) return true; //checks for matrix - std::vector matrices = tools::filter_elements(viennacl::scheduler::DENSE_MATRIX_TYPE, *it); - for (std::vector::iterator itt = matrices.begin(); itt != matrices.end(); ++itt) - if (tools::call_on_matrix(*itt, tools::stride1_fun())>1 || tools::call_on_matrix(*itt, tools::stride2_fun())>2) + std::vector matrices = tools::filter_elements(scheduler::DENSE_MATRIX_TYPE, *it); + for (std::vector::iterator itt = matrices.begin(); itt != matrices.end(); ++itt) + if (itt->matrix->stride1()>1 || itt->matrix->stride2()>1) return true; } return false; @@ -588,19 +588,17 @@ public: { return tools::shared_ptr(new TemplateType(*dynamic_cast(this))); } /** @brief returns whether or not the profile has undefined behavior on particular device */ - int check_invalid(statements_container const & statements, viennacl::ocl::device const & device) const + int check_invalid(scheduler::statements_container const & statements, cl::Device const & device) const { - using namespace viennacl::tools; - //Query device informations - size_t lmem_available = static_cast(device.local_mem_size()); + size_t lmem_available = device.getInfo(); size_t lmem_used = lmem_usage(statements); if (lmem_used>lmem_available) return TEMPLATE_LOCAL_MEMORY_OVERFLOW; //Invalid work group size - size_t max_workgroup_size = device.max_work_group_size(); - std::vector max_work_item_sizes = device.max_work_item_sizes(); + size_t max_workgroup_size = device.getInfo(); + std::vector max_work_item_sizes = device.getInfo(); if (p_.local_size_0*p_.local_size_1 > max_workgroup_size) return TEMPLATE_WORK_GROUP_SIZE_OVERFLOW; if (p_.local_size_0 > max_work_item_sizes[0]) @@ -611,12 +609,12 @@ public: //Advice from the Intel guide unsigned int warp_size = 8; - if (device.type()==CL_DEVICE_TYPE_GPU) + if (device.getInfo()==CL_DEVICE_TYPE_GPU) { //Advice from the nvidia guide warp_size = 32; //Advice from the AMD guide - if (device.vendor_id()==4098) + if (device.getInfo()==4098) warp_size = 64; } if (((p_.local_size_0*p_.local_size_1)%warp_size)>0) diff --git a/atidlas/backend/templates/vector_axpy.hpp b/atidlas/backend/templates/vector_axpy.hpp index f5a04ffb3..2ac2cb315 100644 --- a/atidlas/backend/templates/vector_axpy.hpp +++ b/atidlas/backend/templates/vector_axpy.hpp @@ -4,9 +4,7 @@ #include #include -#include "viennacl/scheduler/forwards.h" -#include "viennacl/tools/tools.hpp" - +#include "atidlas/scheduler/forwards.h" #include "atidlas/backend/templates/template_base.hpp" namespace atidlas @@ -25,14 +23,14 @@ public: class vector_axpy_template : public template_base_impl { private: - virtual int check_invalid_impl(viennacl::ocl::device const &, statements_container const &) const + virtual int check_invalid_impl(cl::Device const &, scheduler::statements_container const &) const { if (p_.fetching_policy==FETCH_FROM_LOCAL) return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_VALID; } - std::vector generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector const & mappings) const + std::vector generate_impl(std::string const & kernel_prefix, scheduler::statements_container const & statements, std::vector const & mappings) const { std::vector result; for (unsigned int i = 0; i < 2; ++i) @@ -95,28 +93,28 @@ public: void up_to_internal_size(bool v) { up_to_internal_size_ = v; } - std::vector input_sizes(statements_container const & statements) + std::vector input_sizes(scheduler::statements_container const & statements) { - viennacl::scheduler::statement const & statement = statements.data().front(); + scheduler::statement const & statement = statements.data().front(); atidlas_int_t size = vector_size(lhs_most(statement.array(), statement.root()), up_to_internal_size_); return tools::make_vector() << size; } - void enqueue(std::string const & kernel_prefix, std::vector & programs, statements_container const & statements) + void enqueue(std::string const & kernel_prefix, std::vector & programs, scheduler::statements_container const & statements) { - atidlas_int_t size = input_sizes(statements)[0]; - std::string kfallback = kernel_prefix; - kfallback+='0'; - std::string kopt = kernel_prefix; - kopt+='1'; - bool fallback = p_.simd_width > 1 && (has_strided_access(statements) || (size%p_.simd_width>0) || has_misaligned_offset(statements)); - viennacl::ocl::kernel * kernel = &programs[fallback?0:1].program().get_kernel(fallback?kfallback:kopt); - kernel->local_work_size(0, p_.local_size_0); - kernel->global_work_size(0, p_.local_size_0*p_.num_groups); - unsigned int current_arg = 0; - kernel->arg(current_arg++, static_cast(size)); - set_arguments(statements, *kernel, current_arg); - viennacl::ocl::enqueue(*kernel); +// atidlas_int_t size = input_sizes(statements)[0]; +// std::string kfallback = kernel_prefix; +// kfallback+='0'; +// std::string kopt = kernel_prefix; +// kopt+='1'; +// bool fallback = p_.simd_width > 1 && (has_strided_access(statements) || (size%p_.simd_width>0) || has_misaligned_offset(statements)); +// cl::Kernel * kernel = &programs[fallback?0:1].program().get_kernel(fallback?kfallback:kopt); +// kernel->local_work_size(0, p_.local_size_0); +// kernel->global_work_size(0, p_.local_size_0*p_.num_groups); +// unsigned int current_arg = 0; +// kernel->arg(current_arg++, static_cast(size)); +// set_arguments(statements, *kernel, current_arg); +// cl::CommandQueue().enqueueNDRangeKernel(kernel); } private: diff --git a/atidlas/backend/tools/misc.hpp b/atidlas/backend/tools/misc.hpp index 24654a852..b8c718a44 100644 --- a/atidlas/backend/tools/misc.hpp +++ b/atidlas/backend/tools/misc.hpp @@ -4,377 +4,56 @@ #include -#include "viennacl/matrix_def.hpp" -#include "viennacl/vector_def.hpp" - -#include "viennacl/ocl/forwards.h" - -#include "viennacl/scheduler/forwards.h" - -#include "viennacl/traits/size.hpp" -#include "viennacl/traits/handle.hpp" - #include "atidlas/tools/to_string.hpp" #include "atidlas/forwards.h" #include "atidlas/backend/forwards.h" +#include "atidlas/scheduler/forwards.h" +#include "atidlas/tools/find_and_replace.hpp" + namespace atidlas { namespace tools { -template -T median(std::vector x) +inline std::string numeric_type_to_string(numeric_type const & type) { - size_t size = x.size(); - std::sort(x.begin(), x.end()); - if (size % 2 == 0) - return (x[size / 2 - 1] + x[size / 2]) / 2; - else - return x[size / 2]; -} - -template -class make_vector { -public: - typedef make_vector my_type; - my_type& operator<< (const T& val) { - data_.push_back(val); - return *this; - } - operator std::vector() const { - return data_; - } -private: - std::vector data_; -}; - -//CUDA Conversion -inline std::string opencl_source_to_cuda_source(std::string const & opencl_src) -{ - std::string res = opencl_src; - - viennacl::tools::find_and_replace(res,"__attribute__","//__attribute__"); - - //Pointer - viennacl::tools::find_and_replace(res, "__global float*", "float*"); - viennacl::tools::find_and_replace(res, "__local float*", "float*"); - - viennacl::tools::find_and_replace(res, "__global double*", "double*"); - viennacl::tools::find_and_replace(res, "__local double*", "double*"); - - //Qualifiers - viennacl::tools::find_and_replace(res,"__global","__device__"); - viennacl::tools::find_and_replace(res,"__kernel","__global__"); - viennacl::tools::find_and_replace(res,"__constant","__constant__"); - viennacl::tools::find_and_replace(res,"__local","__shared__"); - - //Indexing - viennacl::tools::find_and_replace(res,"get_num_groups(0)","gridDim.x"); - viennacl::tools::find_and_replace(res,"get_num_groups(1)","gridDim.y"); - - viennacl::tools::find_and_replace(res,"get_local_size(0)","blockDim.x"); - viennacl::tools::find_and_replace(res,"get_local_size(1)","blockDim.y"); - - viennacl::tools::find_and_replace(res,"get_group_id(0)","blockIdx.x"); - viennacl::tools::find_and_replace(res,"get_group_id(1)","blockIdx.y"); - - viennacl::tools::find_and_replace(res,"get_local_id(0)","threadIdx.x"); - viennacl::tools::find_and_replace(res,"get_local_id(1)","threadIdx.y"); - - viennacl::tools::find_and_replace(res,"get_global_id(0)","(blockIdx.x*blockDim.x + threadIdx.x)"); - viennacl::tools::find_and_replace(res,"get_global_id(1)","(blockIdx.y*blockDim.y + threadIdx.y)"); - - //Synchronization - viennacl::tools::find_and_replace(res,"barrier(CLK_LOCAL_MEM_FENCE)","__syncthreads()"); - viennacl::tools::find_and_replace(res,"barrier(CLK_GLOBAL_MEM_FENCE)","__syncthreads()"); - - - return res; -} - -static std::string numeric_type_to_string(viennacl::scheduler::statement_node_numeric_type const & type){ switch (type) { - //case viennacl::scheduler::CHAR_TYPE: return "char"; - //case viennacl::scheduler::UCHAR_TYPE: return "unsigned char"; - //case viennacl::scheduler::SHORT_TYPE: return "short"; - //case viennacl::scheduler::USHORT_TYPE: return "unsigned short"; - case viennacl::scheduler::INT_TYPE: return "int"; - case viennacl::scheduler::UINT_TYPE: return "unsigned int"; - case viennacl::scheduler::LONG_TYPE: return "long"; - case viennacl::scheduler::ULONG_TYPE: return "unsigned long"; - case viennacl::scheduler::FLOAT_TYPE : return "float"; - case viennacl::scheduler::DOUBLE_TYPE : return "double"; + //case CHAR_TYPE: return "char"; + //case UCHAR_TYPE: return "uchar"; + //case SHORT_TYPE: return "short"; + //case USHORT_TYPE: return "ushort"; + case INT_TYPE: return "int"; + case UINT_TYPE: return "uint"; + case LONG_TYPE: return "long"; + case ULONG_TYPE: return "ulong"; + case FLOAT_TYPE : return "float"; + case DOUBLE_TYPE : return "double"; default : throw generator_not_supported_exception("Unsupported Scalartype"); } } - -template -static typename Fun::result_type call_on_host_scalar(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == viennacl::scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a host scalar")); - switch (element.numeric_type) - { - //case viennacl::scheduler::CHAR_TYPE: return fun(element.host_char); - //case viennacl::scheduler::UCHAR_TYPE: return fun(element.host_uchar); - //case viennacl::scheduler::SHORT_TYPE: return fun(element.host_short); - //case viennacl::scheduler::USHORT_TYPE: return fun(element.host_ushort); - case viennacl::scheduler::INT_TYPE: return fun(element.host_int); - case viennacl::scheduler::UINT_TYPE: return fun(element.host_uint); - case viennacl::scheduler::LONG_TYPE: return fun(element.host_long); - case viennacl::scheduler::ULONG_TYPE: return fun(element.host_ulong); - case viennacl::scheduler::FLOAT_TYPE : return fun(element.host_float); - case viennacl::scheduler::DOUBLE_TYPE : return fun(element.host_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template -static typename Fun::result_type call_on_scalar(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == viennacl::scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a scalar")); - switch (element.numeric_type) - { - //case viennacl::scheduler::CHAR_TYPE: return fun(*element.scalar_char); - //case viennacl::scheduler::UCHAR_TYPE: return fun(*element.scalar_uchar); - //case viennacl::scheduler::SHORT_TYPE: return fun(*element.scalar_short); - //case viennacl::scheduler::USHORT_TYPE: return fun(*element.scalar_ushort); - case viennacl::scheduler::INT_TYPE: return fun(*element.scalar_int); - case viennacl::scheduler::UINT_TYPE: return fun(*element.scalar_uint); - case viennacl::scheduler::LONG_TYPE: return fun(*element.scalar_long); - case viennacl::scheduler::ULONG_TYPE: return fun(*element.scalar_ulong); - case viennacl::scheduler::FLOAT_TYPE : return fun(*element.scalar_float); - case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.scalar_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template -static typename Fun::result_type call_on_vector(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == viennacl::scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a vector")); - switch (element.numeric_type) - { - //case viennacl::scheduler::CHAR_TYPE: return fun(*element.vector_char); - //case viennacl::scheduler::UCHAR_TYPE: return fun(*element.vector_uchar); - //case viennacl::scheduler::SHORT_TYPE: return fun(*element.vector_short); - //case viennacl::scheduler::USHORT_TYPE: return fun(*element.vector_ushort); - case viennacl::scheduler::INT_TYPE: return fun(*element.vector_int); - case viennacl::scheduler::UINT_TYPE: return fun(*element.vector_uint); - case viennacl::scheduler::LONG_TYPE: return fun(*element.vector_long); - case viennacl::scheduler::ULONG_TYPE: return fun(*element.vector_ulong); - case viennacl::scheduler::FLOAT_TYPE : return fun(*element.vector_float); - case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.vector_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template -static typename Fun::result_type call_on_implicit_vector(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == viennacl::scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a implicit_vector")); - assert(element.subtype == viennacl::scheduler::IMPLICIT_VECTOR_TYPE && bool("Must be called on a implicit_vector")); - switch (element.numeric_type) - { - //case viennacl::scheduler::CHAR_TYPE: return fun(*element.implicit_vector_char); - //case viennacl::scheduler::UCHAR_TYPE: return fun(*element.implicit_vector_uchar); - //case viennacl::scheduler::SHORT_TYPE: return fun(*element.implicit_vector_short); - //case viennacl::scheduler::USHORT_TYPE: return fun(*element.implicit_vector_ushort); - case viennacl::scheduler::INT_TYPE: return fun(*element.implicit_vector_int); - case viennacl::scheduler::UINT_TYPE: return fun(*element.implicit_vector_uint); - case viennacl::scheduler::LONG_TYPE: return fun(*element.implicit_vector_long); - case viennacl::scheduler::ULONG_TYPE: return fun(*element.implicit_vector_ulong); - case viennacl::scheduler::FLOAT_TYPE : return fun(*element.implicit_vector_float); - case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.implicit_vector_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template -static typename Fun::result_type call_on_matrix(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == viennacl::scheduler::MATRIX_TYPE_FAMILY && bool("Must be called on a matrix")); - switch (element.numeric_type) - { - //case viennacl::scheduler::CHAR_TYPE: return fun(*element.matrix_char); - //case viennacl::scheduler::UCHAR_TYPE: return fun(*element.matrix_uchar); - //case viennacl::scheduler::SHORT_TYPE: return fun(*element.matrix_short); - //case viennacl::scheduler::USHORT_TYPE: return fun(*element.matrix_ushort); - case viennacl::scheduler::INT_TYPE: return fun(*element.matrix_int); - case viennacl::scheduler::UINT_TYPE: return fun(*element.matrix_uint); - case viennacl::scheduler::LONG_TYPE: return fun(*element.matrix_long); - case viennacl::scheduler::ULONG_TYPE: return fun(*element.matrix_ulong); - case viennacl::scheduler::FLOAT_TYPE : return fun(*element.matrix_float); - case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.matrix_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - - -template -static typename Fun::result_type call_on_implicit_matrix(viennacl::scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.subtype == viennacl::scheduler::IMPLICIT_MATRIX_TYPE && bool("Must be called on a implicit matrix")); - switch (element.numeric_type) - { - //case viennacl::scheduler::CHAR_TYPE: return fun(*element.implicit_matrix_char); - //case viennacl::scheduler::UCHAR_TYPE: return fun(*element.implicit_matrix_uchar); - //case viennacl::scheduler::SHORT_TYPE: return fun(*element.implicit_matrix_short); - //case viennacl::scheduler::USHORT_TYPE: return fun(*element.implicit_matrix_ushort); - case viennacl::scheduler::INT_TYPE: return fun(*element.implicit_matrix_int); - case viennacl::scheduler::UINT_TYPE: return fun(*element.implicit_matrix_uint); - case viennacl::scheduler::LONG_TYPE: return fun(*element.implicit_matrix_long); - case viennacl::scheduler::ULONG_TYPE: return fun(*element.implicit_matrix_ulong); - case viennacl::scheduler::FLOAT_TYPE : return fun(*element.implicit_matrix_float); - case viennacl::scheduler::DOUBLE_TYPE : return fun(*element.implicit_matrix_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template -static typename Fun::result_type call_on_element(viennacl::scheduler::lhs_rhs_element const & element, Fun const & fun){ - switch (element.type_family) - { - case viennacl::scheduler::SCALAR_TYPE_FAMILY: - if (element.subtype == viennacl::scheduler::HOST_SCALAR_TYPE) - return call_on_host_scalar(element, fun); - else - return call_on_scalar(element, fun); - case viennacl::scheduler::VECTOR_TYPE_FAMILY : - if (element.subtype == viennacl::scheduler::IMPLICIT_VECTOR_TYPE) - return call_on_implicit_vector(element, fun); - else - return call_on_vector(element, fun); - case viennacl::scheduler::MATRIX_TYPE_FAMILY: - if (element.subtype == viennacl::scheduler::IMPLICIT_MATRIX_TYPE) - return call_on_implicit_matrix(element, fun); - else - return call_on_matrix(element,fun); - default: - throw generator_not_supported_exception("Unsupported datastructure type : Not among {Scalar, Vector, Matrix}"); - } -} - -struct scalartype_size_fun -{ - typedef atidlas_int_t result_type; - result_type operator()(float const &) const { return sizeof(float); } - result_type operator()(double const &) const { return sizeof(double); } - template result_type operator()(T const &) const { return sizeof(typename viennacl::result_of::cpu_value_type::type); } -}; - -struct internal_size_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::internal_size(t); } -}; - -struct size_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::size(t); } -}; - -struct start_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::start(t); } -}; - - -struct stride_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::stride(t); } -}; - -struct start1_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::start1(t); } -}; - -struct start2_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::start2(t); } -}; - -struct leading_stride_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::stride1(t); } -}; - -struct leading_start_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::start1(t); } -}; - -struct stride1_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::stride1(t); } -}; - -struct stride2_fun -{ - typedef atidlas_int_t result_type; - template result_type operator()(T const &t) const { return viennacl::traits::stride2(t); } -}; - -struct handle_fun -{ - typedef cl_mem result_type; - template - result_type operator()(T const &t) const { return viennacl::traits::opencl_handle(t); } -}; - -struct internal_size1_fun -{ - typedef atidlas_int_t result_type; - template - result_type operator()(T const &t) const { return viennacl::traits::internal_size1(t); } -}; - -struct internal_size2_fun -{ - typedef atidlas_int_t result_type; - template - result_type operator()(T const &t) const { return viennacl::traits::internal_size2(t); } -}; - -struct size1_fun -{ - typedef atidlas_int_t result_type; - template - result_type operator()(T const &t) const { return viennacl::traits::size1(t); } -}; - -struct size2_fun -{ - typedef atidlas_int_t result_type; - template - result_type operator()(T const &t) const { return viennacl::traits::size2(t); } -}; - template struct is_same_type { enum { value = 0 }; }; template struct is_same_type { enum { value = 1 }; }; -inline bool is_reduction(viennacl::scheduler::statement_node const & node) +inline bool is_reduction(scheduler::statement_node const & node) { - return node.op.type_family==viennacl::scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY - || node.op.type_family==viennacl::scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY - || node.op.type_family==viennacl::scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY - || node.op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE - || node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE; + return node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY + || node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY + || node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY + || node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE + || node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE; } -inline bool is_index_reduction(viennacl::scheduler::op_element const & op) +inline bool is_index_reduction(scheduler::op_element const & op) { - return op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE - || op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE - || op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE - || op.type==viennacl::scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE; + return op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE + || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE + || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE + || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE; } template struct type_to_string; @@ -434,9 +113,9 @@ private: std::ostringstream oss; }; -inline bool node_leaf(viennacl::scheduler::op_element const & op) +inline bool node_leaf(scheduler::op_element const & op) { - using namespace viennacl::scheduler; + using namespace scheduler; return op.type==OPERATION_UNARY_NORM_1_TYPE || op.type==OPERATION_UNARY_NORM_2_TYPE || op.type==OPERATION_UNARY_NORM_INF_TYPE @@ -453,9 +132,9 @@ inline bool node_leaf(viennacl::scheduler::op_element const & op) || op.type_family==OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY; } -inline bool elementwise_operator(viennacl::scheduler::op_element const & op) +inline bool elementwise_operator(scheduler::op_element const & op) { - using namespace viennacl::scheduler; + using namespace scheduler; return op.type== OPERATION_BINARY_ASSIGN_TYPE || op.type== OPERATION_BINARY_INPLACE_ADD_TYPE || op.type== OPERATION_BINARY_INPLACE_SUB_TYPE @@ -467,9 +146,9 @@ inline bool elementwise_operator(viennacl::scheduler::op_element const & op) || op.type== OPERATION_BINARY_DIV_TYPE; } -inline bool elementwise_function(viennacl::scheduler::op_element const & op) +inline bool elementwise_function(scheduler::op_element const & op) { - using namespace viennacl::scheduler; + using namespace scheduler; return op.type == OPERATION_UNARY_CAST_CHAR_TYPE @@ -516,37 +195,13 @@ inline bool elementwise_function(viennacl::scheduler::op_element const & op) } -inline viennacl::scheduler::lhs_rhs_element & lhs_rhs_element(viennacl::scheduler::statement const & st, atidlas_int_t idx, leaf_t leaf) +inline scheduler::lhs_rhs_element & lhs_rhs_element(scheduler::statement const & st, atidlas_int_t idx, leaf_t leaf) { using namespace tools; assert(leaf==LHS_NODE_TYPE || leaf==RHS_NODE_TYPE); if (leaf==LHS_NODE_TYPE) - return const_cast(st.array()[idx].lhs); - return const_cast(st.array()[idx].rhs); -} - -inline unsigned int size_of(viennacl::scheduler::statement_node_numeric_type type) -{ - using namespace viennacl::scheduler; - switch (type) - { - case UCHAR_TYPE: - case CHAR_TYPE: return 1; - - case USHORT_TYPE: - case SHORT_TYPE: - case HALF_TYPE: return 2; - - case UINT_TYPE: - case INT_TYPE: - case FLOAT_TYPE: return 4; - - case ULONG_TYPE: - case LONG_TYPE: - case DOUBLE_TYPE: return 8; - - default: throw generator_not_supported_exception("Unsupported scalartype"); - } + return const_cast(st.array()[idx].lhs); + return const_cast(st.array()[idx].rhs); } inline std::string append_width(std::string const & str, unsigned int width) @@ -584,6 +239,20 @@ private: typedef create_map > create_process_accessors; typedef create_map > create_evaluate_accessors; +template +class make_vector { +public: + typedef make_vector my_type; + my_type& operator<< (const T& val) { + data_.push_back(val); + return *this; + } + operator std::vector() const { + return data_; + } +private: + std::vector data_; +}; } } diff --git a/atidlas/backend/tools/tree_parsing.hpp b/atidlas/backend/tools/tree_parsing.hpp index 9ba03a37e..050dc61b5 100644 --- a/atidlas/backend/tools/tree_parsing.hpp +++ b/atidlas/backend/tools/tree_parsing.hpp @@ -5,9 +5,7 @@ #include #include "CL/cl.h" -#include "viennacl/forwards.h" -#include "viennacl/scheduler/forwards.h" - +#include "atidlas/scheduler/forwards.h" #include "atidlas/backend/mapped_objects.hpp" #include "atidlas/backend/tools/misc.hpp" #include "atidlas/forwards.h" @@ -22,15 +20,15 @@ namespace tools class traversal_functor { public: - void call_before_expansion(viennacl::scheduler::statement const &, atidlas_int_t) const { } - void call_after_expansion(viennacl::scheduler::statement const &, atidlas_int_t) const { } + void call_before_expansion(scheduler::statement const &, atidlas_int_t) const { } + void call_after_expansion(scheduler::statement const &, atidlas_int_t) const { } }; /** @brief Recursively execute a functor on a statement */ template -inline void traverse(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect) +inline void traverse(scheduler::statement const & statement, atidlas_int_t root_idx, Fun const & fun, bool inspect) { - viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx]; + scheduler::statement_node const & root_node = statement.array()[root_idx]; bool recurse = tools::node_leaf(root_node.op)?inspect:true; fun.call_before_expansion(statement, root_idx); @@ -38,9 +36,9 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i //Lhs: if (recurse) { - if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) traverse(statement, root_node.lhs.node_index, fun, inspect); - if (root_node.lhs.type_family != viennacl::scheduler::INVALID_TYPE_FAMILY) + if (root_node.lhs.type_family != scheduler::INVALID_TYPE_FAMILY) fun(statement, root_idx, LHS_NODE_TYPE); } @@ -48,11 +46,11 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i fun(statement, root_idx, PARENT_NODE_TYPE); //Rhs: - if (recurse && root_node.rhs.type_family!=viennacl::scheduler::INVALID_TYPE_FAMILY) + if (recurse && root_node.rhs.type_family!=scheduler::INVALID_TYPE_FAMILY) { - if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) traverse(statement, root_node.rhs.node_index, fun, inspect); - if (root_node.rhs.type_family != viennacl::scheduler::INVALID_TYPE_FAMILY) + if (root_node.rhs.type_family != scheduler::INVALID_TYPE_FAMILY) fun(statement, root_idx, RHS_NODE_TYPE); } @@ -62,13 +60,13 @@ inline void traverse(viennacl::scheduler::statement const & statement, atidlas_i class filter_fun : public traversal_functor { public: - typedef bool (*pred_t)(viennacl::scheduler::statement_node const & node); + typedef bool (*pred_t)(scheduler::statement_node const & node); filter_fun(pred_t pred, std::vector & out) : pred_(pred), out_(out){ } - void operator()(viennacl::scheduler::statement const & statement, size_t root_idx, leaf_t) const + void operator()(scheduler::statement const & statement, size_t root_idx, leaf_t) const { - viennacl::scheduler::statement_node const * root_node = &statement.array()[root_idx]; + scheduler::statement_node const * root_node = &statement.array()[root_idx]; if (pred_(*root_node)) out_.push_back(root_idx); } @@ -77,7 +75,7 @@ private: std::vector & out_; }; -inline std::vector filter_nodes(bool (*pred)(viennacl::scheduler::statement_node const & node), viennacl::scheduler::statement const & statement, bool inspect) +inline std::vector filter_nodes(bool (*pred)(scheduler::statement_node const & node), scheduler::statement const & statement, bool inspect) { std::vector res; tools::traverse(statement, statement.root(), filter_fun(pred, res), inspect); @@ -87,32 +85,32 @@ inline std::vector filter_nodes(bool (*pred)(viennacl::scheduler::statem class filter_elements_fun : public traversal_functor { public: - filter_elements_fun(viennacl::scheduler::statement_node_subtype subtype, std::vector & out) : subtype_(subtype), out_(out) { } + filter_elements_fun(scheduler::statement_node_subtype subtype, std::vector & out) : subtype_(subtype), out_(out) { } - void operator()(viennacl::scheduler::statement const & statement, size_t root_idx, leaf_t) const + void operator()(scheduler::statement const & statement, size_t root_idx, leaf_t) const { - viennacl::scheduler::statement_node const * root_node = &statement.array()[root_idx]; + scheduler::statement_node const * root_node = &statement.array()[root_idx]; if (root_node->lhs.subtype==subtype_) out_.push_back(root_node->lhs); if (root_node->rhs.subtype==subtype_) out_.push_back(root_node->rhs); } private: - viennacl::scheduler::statement_node_subtype subtype_; - std::vector & out_; + scheduler::statement_node_subtype subtype_; + std::vector & out_; }; -inline std::vector filter_elements(viennacl::scheduler::statement_node_subtype subtype, viennacl::scheduler::statement const & statement) +inline std::vector filter_elements(scheduler::statement_node_subtype subtype, scheduler::statement const & statement) { - std::vector res; + std::vector res; tools::traverse(statement, statement.root(), filter_elements_fun(subtype, res), true); return res; } /** @brief generate a string from an operation_node_type */ -inline const char * evaluate(viennacl::scheduler::operation_node_type type) +inline const char * evaluate(scheduler::operation_node_type type) { - using namespace viennacl::scheduler; + using namespace scheduler; // unary expression switch (type) { @@ -194,9 +192,9 @@ inline const char * evaluate(viennacl::scheduler::operation_node_type type) } } -inline const char * operator_string(viennacl::scheduler::operation_node_type type) +inline const char * operator_string(scheduler::operation_node_type type) { - using namespace viennacl::scheduler; + using namespace scheduler; switch (type) { case OPERATION_UNARY_CAST_CHAR_TYPE : return "char"; @@ -237,24 +235,24 @@ private: public: evaluate_expression_traversal(std::map const & accessors, std::string & str, mapping_type const & mapping) : accessors_(accessors), str_(str), mapping_(mapping){ } - void call_before_expansion(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx) const + void call_before_expansion(scheduler::statement const & statement, atidlas_int_t root_idx) const { - viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx]; - if ((root_node.op.type_family==viennacl::scheduler::OPERATION_UNARY_TYPE_FAMILY || tools::elementwise_function(root_node.op)) + scheduler::statement_node const & root_node = statement.array()[root_idx]; + if ((root_node.op.type_family==scheduler::OPERATION_UNARY_TYPE_FAMILY || tools::elementwise_function(root_node.op)) && !tools::node_leaf(root_node.op)) str_+=tools::evaluate(root_node.op.type); str_+="("; } - void call_after_expansion(viennacl::scheduler::statement const & /*statement*/, atidlas_int_t /*root_idx*/) const + void call_after_expansion(scheduler::statement const & /*statement*/, atidlas_int_t /*root_idx*/) const { str_+=")"; } - void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf) const + void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf) const { - viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx]; + scheduler::statement_node const & root_node = statement.array()[root_idx]; mapping_type::key_type key = std::make_pair(root_idx, leaf); if (leaf==PARENT_NODE_TYPE) { @@ -262,20 +260,20 @@ public: str_ += mapping_.at(key)->evaluate(accessors_); else if (tools::elementwise_operator(root_node.op)) str_ += tools::evaluate(root_node.op.type); - else if (root_node.op.type_family!=viennacl::scheduler::OPERATION_UNARY_TYPE_FAMILY && tools::elementwise_function(root_node.op)) + else if (root_node.op.type_family!=scheduler::OPERATION_UNARY_TYPE_FAMILY && tools::elementwise_function(root_node.op)) str_ += ","; } else { if (leaf==LHS_NODE_TYPE) { - if (root_node.lhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + if (root_node.lhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY) str_ += mapping_.at(key)->evaluate(accessors_); } if (leaf==RHS_NODE_TYPE) { - if (root_node.rhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + if (root_node.rhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY) str_ += mapping_.at(key)->evaluate(accessors_); } } @@ -283,22 +281,22 @@ public: }; inline std::string evaluate(leaf_t leaf, std::map const & accessors, - viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, mapping_type const & mapping) + scheduler::statement const & statement, atidlas_int_t root_idx, mapping_type const & mapping) { std::string res; evaluate_expression_traversal traversal_functor(accessors, res, mapping); - viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx]; + scheduler::statement_node const & root_node = statement.array()[root_idx]; if (leaf==RHS_NODE_TYPE) { - if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) tools::traverse(statement, root_node.rhs.node_index, traversal_functor, false); else traversal_functor(statement, root_idx, leaf); } else if (leaf==LHS_NODE_TYPE) { - if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) tools::traverse(statement, root_node.lhs.node_index, traversal_functor, false); else traversal_functor(statement, root_idx, leaf); @@ -310,9 +308,9 @@ inline std::string evaluate(leaf_t leaf, std::map cons } inline void evaluate(tools::kernel_generation_stream & stream, leaf_t leaf, std::map const & accessors, - statements_container const & statements, std::vector const & mappings) + scheduler::statements_container const & statements, std::vector const & mappings) { - statements_container::data_type::const_iterator sit; + scheduler::statements_container::data_type::const_iterator sit; std::vector::const_iterator mit; for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++mit, ++sit) @@ -327,7 +325,7 @@ public: process_traversal(std::multimap const & accessors, tools::kernel_generation_stream & stream, mapping_type const & mapping, std::set & already_processed) : accessors_(accessors), stream_(stream), mapping_(mapping), already_processed_(already_processed){ } - void operator()(viennacl::scheduler::statement const & /*statement*/, atidlas_int_t root_idx, leaf_t leaf) const + void operator()(scheduler::statement const & /*statement*/, atidlas_int_t root_idx, leaf_t leaf) const { mapping_type::const_iterator it = mapping_.find(std::make_pair(root_idx, leaf)); if (it!=mapping_.end()) @@ -353,21 +351,21 @@ private: }; inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap const & accessors, - viennacl::scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set & already_processed) + scheduler::statement const & statement, size_t root_idx, mapping_type const & mapping, std::set & already_processed) { process_traversal traversal_functor(accessors, stream, mapping, already_processed); - viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx]; + scheduler::statement_node const & root_node = statement.array()[root_idx]; if (leaf==RHS_NODE_TYPE) { - if (root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) tools::traverse(statement, root_node.rhs.node_index, traversal_functor, true); else traversal_functor(statement, root_idx, leaf); } else if (leaf==LHS_NODE_TYPE) { - if (root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) + if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) tools::traverse(statement, root_node.lhs.node_index, traversal_functor, true); else traversal_functor(statement, root_idx, leaf); @@ -379,9 +377,9 @@ inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std:: } inline void process(tools::kernel_generation_stream & stream, leaf_t leaf, std::multimap const & accessors, - statements_container const & statements, std::vector const & mappings) + scheduler::statements_container const & statements, std::vector const & mappings) { - statements_container::data_type::const_iterator sit; + scheduler::statements_container::data_type::const_iterator sit; std::vector::const_iterator mit; std::set already_processed; @@ -404,78 +402,83 @@ private: } } -public: - typedef void result_type; + /** @brief Vector mapping */ + inline void append(atidlas::vector_base const & vec) const + { + *ptr_++='v'; //vector + *ptr_++=(char)vec.dtype(); + append_id(ptr_, binder_.get(&vec.data())); + } + inline void append(scheduler::lhs_rhs_element const & lhs_rhs) const + { + if(lhs_rhs.subtype==scheduler::DENSE_VECTOR_TYPE) + append(*lhs_rhs.vector); + } + +public: statement_representation_functor(symbolic_binder & binder, char *& ptr) : binder_(binder), ptr_(ptr){ } - template - inline result_type operator()(NumericT const & /*scal*/) const - { - *ptr_++='h'; //host - *ptr_++='s'; //scalar - *ptr_++=tools::first_letter_of_type::value(); - } +// template +// inline result_type operator()(NumericT const & /*scal*/) const +// { +// *ptr_++='h'; //host +// *ptr_++='s'; //scalar +// *ptr_++=tools::first_letter_of_type::value(); +// } - /** @brief Scalar mapping */ - template - inline result_type operator()(viennacl::scalar const & scal) const - { - *ptr_++='s'; //scalar - *ptr_++=tools::first_letter_of_type::value(); - append_id(ptr_, binder_.get(&viennacl::traits::handle(scal))); - } +// /** @brief Scalar mapping */ +// template +// inline result_type operator()(viennacl::scalar const & scal) const +// { +// *ptr_++='s'; //scalar +// *ptr_++=tools::first_letter_of_type::value(); +// append_id(ptr_, binder_.get(&viennacl::traits::handle(scal))); +// } - /** @brief Vector mapping */ - template - inline result_type operator()(viennacl::vector_base const & vec) const - { - *ptr_++='v'; //vector - *ptr_++=tools::first_letter_of_type::value(); - append_id(ptr_, binder_.get(&viennacl::traits::handle(vec))); - } - /** @brief Implicit vector mapping */ - template - inline result_type operator()(viennacl::implicit_vector_base const & /*vec*/) const - { - *ptr_++='i'; //implicit - *ptr_++='v'; //vector - *ptr_++=tools::first_letter_of_type::value(); - } - /** @brief Matrix mapping */ - template - inline result_type operator()(viennacl::matrix_base const & mat) const - { - *ptr_++='m'; //Matrix - *ptr_++=tools::first_letter_of_type::value(); - append_id(ptr_, binder_.get(&viennacl::traits::handle(mat))); - } +// /** @brief Implicit vector mapping */ +// template +// inline result_type operator()(viennacl::implicit_vector_base const & /*vec*/) const +// { +// *ptr_++='i'; //implicit +// *ptr_++='v'; //vector +// *ptr_++=tools::first_letter_of_type::value(); +// } - /** @brief Implicit matrix mapping */ - template - inline result_type operator()(viennacl::implicit_matrix_base const & /*mat*/) const - { - *ptr_++='i'; //implicit - *ptr_++='m'; //matrix - *ptr_++=tools::first_letter_of_type::value(); - } +// /** @brief Matrix mapping */ +// template +// inline result_type operator()(viennacl::matrix_base const & mat) const +// { +// *ptr_++='m'; //Matrix +// *ptr_++=tools::first_letter_of_type::value(); +// append_id(ptr_, binder_.get(&viennacl::traits::handle(mat))); +// } - static inline void append(char*& p, const char * str) +// /** @brief Implicit matrix mapping */ +// template +// inline result_type operator()(viennacl::implicit_matrix_base const & /*mat*/) const +// { +// *ptr_++='i'; //implicit +// *ptr_++='m'; //matrix +// *ptr_++=tools::first_letter_of_type::value(); +// } + + inline void append(char*& p, const char * str) const { std::size_t n = std::strlen(str); std::memcpy(p, str, n); p+=n; } - inline void operator()(viennacl::scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const + inline void operator()(scheduler::statement const & statement, atidlas_int_t root_idx, leaf_t leaf_t) const { - viennacl::scheduler::statement_node const & root_node = statement.array()[root_idx]; - if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) - tools::call_on_element(root_node.lhs, *this); - else if (root_node.op.type_family==viennacl::scheduler::OPERATION_BINARY_TYPE_FAMILY && leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) - tools::call_on_element(root_node.rhs, *this); + scheduler::statement_node const & root_node = statement.array()[root_idx]; + if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) + append(root_node.lhs); + else if (root_node.op.type_family==scheduler::OPERATION_BINARY_TYPE_FAMILY && leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) + append(root_node.rhs); else if (leaf_t==PARENT_NODE_TYPE) append_id(ptr_,root_node.op.type); } @@ -485,16 +488,16 @@ private: char *& ptr_; }; -inline std::string statements_representation(statements_container const & statements, binding_policy_t binding_policy) +inline std::string statements_representation(scheduler::statements_container const & statements, binding_policy_t binding_policy) { std::vector program_name_vector(256); char* program_name = program_name_vector.data(); - if (statements.order()==statements_container::INDEPENDENT) + if (statements.order()==scheduler::statements_container::INDEPENDENT) *program_name++='i'; else *program_name++='s'; tools::shared_ptr binder = make_binder(binding_policy); - for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) + for (scheduler::statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) tools::traverse(*it, it->root(), tools::statement_representation_functor(*binder, program_name),true); *program_name='\0'; return std::string(program_name_vector.data()); diff --git a/atidlas/detail/vector_def.hpp b/atidlas/detail/vector_def.hpp new file mode 100644 index 000000000..c403b850f --- /dev/null +++ b/atidlas/detail/vector_def.hpp @@ -0,0 +1,17 @@ +#ifndef ATIDLAS_VECTOR_HPP_ +#define ATIDLAS_VECTOR_HPP_ + +#include "CL/cl.hpp" + +#include "atidlas/forwards.h" + + +namespace atidlas +{ + + + + +} + +#endif diff --git a/atidlas/expression_template.hpp b/atidlas/expression_template.hpp new file mode 100644 index 000000000..2bce8efb1 --- /dev/null +++ b/atidlas/expression_template.hpp @@ -0,0 +1,80 @@ +#ifndef ATIDLAS_EXPRESSION_TEMPLATE_HPP_ +#define ATIDLAS_EXPRESSION_TEMPLATE_HPP_ + +#include "atidlas/forwards.h" +#include "atidlas/traits/size.hpp" + +namespace atidlas +{ + +namespace detail +{ + +template +struct reference_if_nonscalar +{ + typedef T & type; +}; + +#define ATIDLAS_REFERENCE_IF_NONSCALAR(TNAME) \ +template<> struct reference_if_nonscalar { typedef TNAME type; }; \ +template<> struct reference_if_nonscalar { typedef const TNAME type; }; + + ATIDLAS_REFERENCE_IF_NONSCALAR(char) + ATIDLAS_REFERENCE_IF_NONSCALAR(short) + ATIDLAS_REFERENCE_IF_NONSCALAR(int) + ATIDLAS_REFERENCE_IF_NONSCALAR(long) + ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned char) + ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned short) + ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned int) + ATIDLAS_REFERENCE_IF_NONSCALAR(unsigned long) + + ATIDLAS_REFERENCE_IF_NONSCALAR(float) + ATIDLAS_REFERENCE_IF_NONSCALAR(double) +#undef ATIDLAS_REFERENCE_IF_NONSCALAR + +} + +/** @brief An expression template class that represents a binary operation +* @tparam LHS left hand side operand +* @tparam RHS right hand side operand +* @tparam OP the operator +*/ +template +class expression_template +{ + typedef typename detail::reference_if_nonscalar::type lhs_reference_type; + typedef typename detail::reference_if_nonscalar::type rhs_reference_type; +public: + expression_template(LHS & l, RHS & r) : lhs_(l), rhs_(r) {} + /** @brief Get left hand side operand */ + lhs_reference_type lhs() const { return lhs_; } + /** @brief Get right hand side operand */ + rhs_reference_type rhs() const { return rhs_; } + /** @brief Returns the size of the result vector */ + atidlas_int_t size() const { return traits::size(*this); } +private: + /** @brief The left hand side operand */ + lhs_reference_type lhs_; + /** @brief The right hand side operand */ + rhs_reference_type rhs_; +}; + +template +struct vector_expression: public expression_template{ + vector_expression(LHS & l, RHS & r) : expression_template(l, r){ } +}; + +template +class matrix_expression: public expression_template{ + matrix_expression(LHS & l, RHS & r) : expression_template(l, r){ } +}; + +template +class scalar_expression: public expression_template{ + scalar_expression(LHS & l, RHS & r) : expression_template(l, r){ } +}; + +} + +#endif diff --git a/atidlas/forwards.h b/atidlas/forwards.h index 19ca454f1..f4bde9bab 100644 --- a/atidlas/forwards.h +++ b/atidlas/forwards.h @@ -1,10 +1,285 @@ #ifndef ATIDLAS_FORWARDS_H #define ATIDLAS_FORWARDS_H +#include "CL/cl.hpp" + namespace atidlas { typedef int atidlas_int_t; +enum numeric_type +{ + INVALID_NUMERIC_TYPE = 0, + CHAR_TYPE, + UCHAR_TYPE, + SHORT_TYPE, + USHORT_TYPE, + INT_TYPE, + UINT_TYPE, + LONG_TYPE, + ULONG_TYPE, + HALF_TYPE, + FLOAT_TYPE, + DOUBLE_TYPE +}; + +inline unsigned int size_of(numeric_type type) +{ + switch (type) + { + case UCHAR_TYPE: + case CHAR_TYPE: return 1; + + case USHORT_TYPE: + case SHORT_TYPE: + case HALF_TYPE: return 2; + + case UINT_TYPE: + case INT_TYPE: + case FLOAT_TYPE: return 4; + + case ULONG_TYPE: + case LONG_TYPE: + case DOUBLE_TYPE: return 8; + + default: throw "Unsupported numeric type"; + } +} + +template +class vector_expression; + +class vector_base +{ +public: + vector_base(atidlas_int_t size, numeric_type dtype, cl::Context context) : dtype_(dtype), size_(size), internal_size_(size), start_(0), stride_(1), context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*internal_size()) {} + vector_base(cl::Buffer data, atidlas_int_t size, numeric_type dtype, atidlas_int_t start, atidlas_int_t stride): dtype_(dtype), size_(size), internal_size_(size), start_(start), stride_(stride), context_(data.getInfo()), data_(data){ } + + numeric_type dtype() const { return dtype_; } + atidlas_int_t size() const { return size_; } + atidlas_int_t internal_size() const { return internal_size_; } + atidlas_int_t start() const { return start_; } + atidlas_int_t stride() const { return stride_; } + + template + vector_base& operator=(vector_expression const &); + + cl::Context const & context() const { return context_; } + cl::Buffer const & data() const { return data_; } + + +private: + numeric_type dtype_; + + atidlas_int_t size_; + atidlas_int_t internal_size_; + atidlas_int_t start_; + atidlas_int_t stride_; + + cl::Context context_; + cl::Buffer data_; +}; + +class matrix_base +{ +public: + matrix_base(atidlas_int_t size1, atidlas_int_t size2, numeric_type dtype, cl::Context context) : dtype_(dtype), size1_(size1), internal_size1_(size1), start1_(0), stride1_(1), + size2_(size2), internal_size2_(size2), start2_(0), stride2_(2), + context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*internal_size()) {} + matrix_base(cl::Buffer data, atidlas_int_t size1, atidlas_int_t start1, atidlas_int_t stride1, + atidlas_int_t size2, atidlas_int_t start2, atidlas_int_t stride2, + numeric_type dtype): dtype_(dtype), size1_(size1), start1_(start1), stride1_(stride1), + size2_(size2), start2_(start2), stride2_(stride2), context_(data.getInfo()), data_(data){ } + + numeric_type dtype() const { return dtype_; } + + atidlas_int_t size1() const { return size1_; } + atidlas_int_t internal_size1() const { return size1_; } + atidlas_int_t start1() const { return start1_; } + atidlas_int_t stride1() const { return stride1_; } + + atidlas_int_t size2() const { return size2_; } + atidlas_int_t internal_size2() const { return size2_; } + atidlas_int_t start2() const { return start2_; } + atidlas_int_t stride2() const { return stride2_; } + + atidlas_int_t internal_size() const { return internal_size1_*internal_size2_; } + + cl::Context const & context() const { return context_; } + cl::Buffer const & data() const { return data_; } + + +private: + numeric_type dtype_; + + atidlas_int_t size1_; + atidlas_int_t internal_size1_; + atidlas_int_t start1_; + atidlas_int_t stride1_; + + atidlas_int_t size2_; + atidlas_int_t internal_size2_; + atidlas_int_t start2_; + atidlas_int_t stride2_; + + cl::Context context_; + cl::Buffer data_; +}; + +/** @brief A tag class representing assignment */ +struct op_assign {}; +/** @brief A tag class representing inplace addition */ +struct op_inplace_add {}; +/** @brief A tag class representing inplace subtraction */ +struct op_inplace_sub {}; + +/** @brief A tag class representing addition */ +struct op_add {}; +/** @brief A tag class representing subtraction */ +struct op_sub {}; +/** @brief A tag class representing multiplication by a scalar */ +struct op_mult {}; +/** @brief A tag class representing matrix-vector products and element-wise multiplications*/ +struct op_prod {}; +/** @brief A tag class representing matrix-matrix products */ +struct op_mat_mat_prod {}; +/** @brief A tag class representing division */ +struct op_div {}; +/** @brief A tag class representing the power function */ +struct op_pow {}; + +/** @brief A tag class representing equality */ +struct op_eq {}; +/** @brief A tag class representing inequality */ +struct op_neq {}; +/** @brief A tag class representing greater-than */ +struct op_greater {}; +/** @brief A tag class representing less-than */ +struct op_less {}; +/** @brief A tag class representing greater-than-or-equal-to */ +struct op_geq {}; +/** @brief A tag class representing less-than-or-equal-to */ +struct op_leq {}; + +template +struct op_reduce_vector{ }; + +template +struct op_reduce_rows{ }; + +template +struct op_reduce_columns{ }; + +/** @brief A tag class representing element-wise casting operations on vectors and matrices */ +template +struct op_element_cast {}; + +/** @brief A tag class representing element-wise binary operations (like multiplication) on vectors or matrices */ +template +struct op_element_binary {}; + +/** @brief A tag class representing element-wise unary operations (like sin()) on vectors or matrices */ +template +struct op_element_unary {}; + +/** @brief A tag class representing the modulus function for integers */ +struct op_abs {}; +/** @brief A tag class representing the acos() function */ +struct op_acos {}; +/** @brief A tag class representing the asin() function */ +struct op_asin {}; +/** @brief A tag class for representing the argmax() function */ +struct op_argmax {}; +/** @brief A tag class for representing the argmin() function */ +struct op_argmin {}; +/** @brief A tag class representing the atan() function */ +struct op_atan {}; +/** @brief A tag class representing the atan2() function */ +struct op_atan2 {}; +/** @brief A tag class representing the ceil() function */ +struct op_ceil {}; +/** @brief A tag class representing the cos() function */ +struct op_cos {}; +/** @brief A tag class representing the cosh() function */ +struct op_cosh {}; +/** @brief A tag class representing the exp() function */ +struct op_exp {}; +/** @brief A tag class representing the fabs() function */ +struct op_fabs {}; +/** @brief A tag class representing the fdim() function */ +struct op_fdim {}; +/** @brief A tag class representing the floor() function */ +struct op_floor {}; +/** @brief A tag class representing the fmax() function */ +struct op_fmax {}; +/** @brief A tag class representing the fmin() function */ +struct op_fmin {}; +/** @brief A tag class representing the fmod() function */ +struct op_fmod {}; +/** @brief A tag class representing the log() function */ +struct op_log {}; +/** @brief A tag class representing the log10() function */ +struct op_log10 {}; +/** @brief A tag class representing the sin() function */ +struct op_sin {}; +/** @brief A tag class representing the sinh() function */ +struct op_sinh {}; +/** @brief A tag class representing the sqrt() function */ +struct op_sqrt {}; +/** @brief A tag class representing the tan() function */ +struct op_tan {}; +/** @brief A tag class representing the tanh() function */ +struct op_tanh {}; + +/** @brief A tag class representing the (off-)diagonal of a matrix */ +struct op_matrix_diag {}; + +/** @brief A tag class representing a matrix given by a vector placed on a certain (off-)diagonal */ +struct op_vector_diag {}; + +/** @brief A tag class representing the extraction of a matrix row to a vector */ +struct op_row {}; + +/** @brief A tag class representing the extraction of a matrix column to a vector */ +struct op_column {}; + +/** @brief A tag class representing inner products of two vectors */ +struct op_inner_prod {}; + +/** @brief A tag class representing the 1-norm of a vector */ +struct op_norm_1 {}; + +/** @brief A tag class representing the 2-norm of a vector */ +struct op_norm_2 {}; + +/** @brief A tag class representing the inf-norm of a vector */ +struct op_norm_inf {}; + +/** @brief A tag class representing the maximum of a vector */ +struct op_max {}; + +/** @brief A tag class representing the minimum of a vector */ +struct op_min {}; + + +/** @brief A tag class representing the Frobenius-norm of a matrix */ +struct op_norm_frobenius {}; + +/** @brief A tag class representing transposed matrices */ +struct op_trans {}; + +/** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */ +struct op_flip_sign {}; + +template +class vector_expression; + +template +class matrix_expression; + +template +class scalar_expression; + } #endif diff --git a/atidlas/model/model.hpp b/atidlas/model/model.hpp index f34b79af0..0537014ba 100644 --- a/atidlas/model/model.hpp +++ b/atidlas/model/model.hpp @@ -3,13 +3,11 @@ #include "rapidjson/document.h" -#include "viennacl/ocl/program.hpp" -#include "viennacl/tools/timer.hpp" - +#include "atidlas/backend/templates/template_base.hpp" #include "atidlas/model/tools.hpp" #include "atidlas/tools/shared_ptr.hpp" #include "atidlas/tools/lazy_program_compiler.hpp" -#include "atidlas/backend/templates/template_base.hpp" +#include "atidlas/tools/timer.hpp" namespace atidlas { @@ -107,7 +105,7 @@ namespace atidlas templates_(1,tp.clone()), context_(context), device_(device) {} - void execute(statements_container const & statements, bool bypass_predictor = false, bool force_recompilation = false) + void execute(scheduler::statements_container const & statements, bool bypass_predictor = false, bool force_recompilation = false) { bypass_predictor = bypass_predictor || predictor_.get()==NULL; @@ -147,11 +145,11 @@ namespace atidlas templates_[label]->enqueue("k" + tools::to_string(label), lazy_programs_, statements); } - void tune(statements_container const & statements) + void tune(scheduler::statements_container const & statements) { //Collect the timings std::vector timings(templates_.size()); - viennacl::tools::timer timer; + tools::timer timer; for(size_t i = 0 ; i < templates_.size() ; ++i) { timer.start(); diff --git a/atidlas/scheduler/forwards.h b/atidlas/scheduler/forwards.h new file mode 100644 index 000000000..65578d78f --- /dev/null +++ b/atidlas/scheduler/forwards.h @@ -0,0 +1,424 @@ +#ifndef ATIDLAS_SCHEDULER_STATEMENT_HPP +#define ATIDLAS_SCHEDULER_STATEMENT_HPP + +#include "atidlas/forwards.h" +#include "atidlas/tools/predicate.hpp" +#include "atidlas/tools/enable_if.hpp" + +#include +#include + +namespace atidlas +{ +namespace scheduler +{ + +/** @brief Optimization enum for grouping operations into unary or binary operations. Just for optimization of lookups. */ +enum operation_node_type_family +{ + OPERATION_INVALID_TYPE_FAMILY = 0, + + // unary or binary expression + OPERATION_UNARY_TYPE_FAMILY, + OPERATION_BINARY_TYPE_FAMILY, + + //reductions + OPERATION_VECTOR_REDUCTION_TYPE_FAMILY, + OPERATION_ROWS_REDUCTION_TYPE_FAMILY, + OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY +}; + +/** @brief Enumeration for identifying the possible operations */ +enum operation_node_type +{ + OPERATION_INVALID_TYPE = 0, + + // unary operator + OPERATION_UNARY_MINUS_TYPE, + + // unary expression + OPERATION_UNARY_CAST_CHAR_TYPE, + OPERATION_UNARY_CAST_UCHAR_TYPE, + OPERATION_UNARY_CAST_SHORT_TYPE, + OPERATION_UNARY_CAST_USHORT_TYPE, + OPERATION_UNARY_CAST_INT_TYPE, + OPERATION_UNARY_CAST_UINT_TYPE, + OPERATION_UNARY_CAST_LONG_TYPE, + OPERATION_UNARY_CAST_ULONG_TYPE, + OPERATION_UNARY_CAST_HALF_TYPE, + OPERATION_UNARY_CAST_FLOAT_TYPE, + OPERATION_UNARY_CAST_DOUBLE_TYPE, + + OPERATION_UNARY_ABS_TYPE, + OPERATION_UNARY_ACOS_TYPE, + OPERATION_UNARY_ASIN_TYPE, + OPERATION_UNARY_ATAN_TYPE, + OPERATION_UNARY_CEIL_TYPE, + OPERATION_UNARY_COS_TYPE, + OPERATION_UNARY_COSH_TYPE, + OPERATION_UNARY_EXP_TYPE, + OPERATION_UNARY_FABS_TYPE, + OPERATION_UNARY_FLOOR_TYPE, + OPERATION_UNARY_LOG_TYPE, + OPERATION_UNARY_LOG10_TYPE, + OPERATION_UNARY_SIN_TYPE, + OPERATION_UNARY_SINH_TYPE, + OPERATION_UNARY_SQRT_TYPE, + OPERATION_UNARY_TAN_TYPE, + OPERATION_UNARY_TANH_TYPE, + + OPERATION_UNARY_TRANS_TYPE, + OPERATION_UNARY_NORM_1_TYPE, + OPERATION_UNARY_NORM_2_TYPE, + OPERATION_UNARY_NORM_INF_TYPE, + OPERATION_UNARY_MAX_TYPE, + OPERATION_UNARY_MIN_TYPE, + + // binary expression + OPERATION_BINARY_ACCESS_TYPE, + OPERATION_BINARY_ASSIGN_TYPE, + OPERATION_BINARY_INPLACE_ADD_TYPE, + OPERATION_BINARY_INPLACE_SUB_TYPE, + OPERATION_BINARY_ADD_TYPE, + OPERATION_BINARY_SUB_TYPE, + OPERATION_BINARY_MULT_TYPE, + OPERATION_BINARY_DIV_TYPE, + OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE, + OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE, + OPERATION_BINARY_ELEMENT_ARGMAX_TYPE, + OPERATION_BINARY_ELEMENT_ARGMIN_TYPE, + OPERATION_BINARY_ELEMENT_PROD_TYPE, + OPERATION_BINARY_ELEMENT_DIV_TYPE, + OPERATION_BINARY_ELEMENT_EQ_TYPE, + OPERATION_BINARY_ELEMENT_NEQ_TYPE, + OPERATION_BINARY_ELEMENT_GREATER_TYPE, + OPERATION_BINARY_ELEMENT_GEQ_TYPE, + OPERATION_BINARY_ELEMENT_LESS_TYPE, + OPERATION_BINARY_ELEMENT_LEQ_TYPE, + OPERATION_BINARY_ELEMENT_POW_TYPE, + OPERATION_BINARY_ELEMENT_FMAX_TYPE, + OPERATION_BINARY_ELEMENT_FMIN_TYPE, + OPERATION_BINARY_ELEMENT_MAX_TYPE, + OPERATION_BINARY_ELEMENT_MIN_TYPE, + + OPERATION_BINARY_MATRIX_DIAG_TYPE, + OPERATION_BINARY_VECTOR_DIAG_TYPE, + OPERATION_BINARY_MATRIX_ROW_TYPE, + OPERATION_BINARY_MATRIX_COLUMN_TYPE, + OPERATION_BINARY_MAT_VEC_PROD_TYPE, + OPERATION_BINARY_MAT_MAT_PROD_TYPE, + OPERATION_BINARY_INNER_PROD_TYPE + +}; + + + +namespace result_of +{ + template + struct op_type_info + { + typedef typename T::ERROR_UNKNOWN_OP_TYPE error_type; + }; + + // elementwise casts + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_CHAR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_UCHAR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_SHORT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_USHORT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_INT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_UINT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_LONG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_ULONG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_FLOAT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CAST_DOUBLE_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + + // elementwise functions + template<> struct op_type_info > { enum { id = OPERATION_UNARY_ABS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_ACOS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_ASIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_ATAN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_CEIL_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_COS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_COSH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_EXP_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_FABS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_FLOOR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_LOG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_LOG10_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_SIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_SINH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_SQRT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_TAN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_UNARY_TANH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_ARGMAX_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_ARGMIN_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_POW_TYPE , family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_EQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_NEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_GREATER_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_LESS_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_GEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_LEQ_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_FMAX_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_FMIN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + + + //structurewise function + template<> struct op_type_info { enum { id = OPERATION_UNARY_NORM_1_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_UNARY_NORM_2_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_UNARY_NORM_INF_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_UNARY_MAX_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_UNARY_MIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + + template<> struct op_type_info { enum { id = OPERATION_UNARY_TRANS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_MATRIX_ROW_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_MATRIX_COLUMN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + + template<> struct op_type_info { enum { id = OPERATION_BINARY_MATRIX_DIAG_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_VECTOR_DIAG_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + + template<> struct op_type_info { enum { id = OPERATION_BINARY_MAT_VEC_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_MAT_MAT_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_INNER_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + + template struct op_type_info > { enum { id = op_type_info::id, family = OPERATION_VECTOR_REDUCTION_TYPE_FAMILY}; }; + template struct op_type_info > { enum { id = op_type_info::id, family = OPERATION_ROWS_REDUCTION_TYPE_FAMILY}; }; + template struct op_type_info > { enum { id = op_type_info::id, family = OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY}; }; + + //elementwise operator + template<> struct op_type_info { enum { id = OPERATION_BINARY_ASSIGN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_INPLACE_ADD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_INPLACE_SUB_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_ADD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_SUB_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_DIV_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_MULT_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + template<> struct op_type_info { enum { id = OPERATION_BINARY_DIV_TYPE, family = OPERATION_BINARY_TYPE_FAMILY}; }; + + template<> struct op_type_info { enum { id = OPERATION_UNARY_MINUS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY}; }; + + + /** \endcond */ +} // namespace result_of + + + + + +/** @brief Groups the type of a node in the statement tree. Used for faster dispatching */ +enum statement_node_type_family +{ + INVALID_TYPE_FAMILY = 0, + // LHS or RHS are again an expression: + COMPOSITE_OPERATION_FAMILY, + // device scalars: + SCALAR_TYPE_FAMILY, + // vector: + VECTOR_TYPE_FAMILY, + // matrices: + MATRIX_TYPE_FAMILY +}; + +/** @brief Encodes the type of a node in the statement tree. */ +enum statement_node_subtype +{ + INVALID_SUBTYPE = 0, + + HOST_SCALAR_TYPE, + DEVICE_SCALAR_TYPE, + + DENSE_VECTOR_TYPE, + IMPLICIT_VECTOR_TYPE, + + DENSE_MATRIX_TYPE, + IMPLICIT_MATRIX_TYPE, +}; + +/** @brief A class representing the 'data' for the LHS or RHS operand of the respective node. + * + * If it represents a compound expression, the union holds the array index within the respective statement array. + * If it represents a object with data (vector, matrix, etc.) it holds the respective pointer (scalar, vector, matrix) or value (host scalar) + * + * The member 'type_family' is an optimization for quickly retrieving the 'type', which denotes the currently 'active' member in the union + */ +struct lhs_rhs_element +{ + statement_node_type_family type_family; + statement_node_subtype subtype; + numeric_type numeric_t; + + union + { + unsigned int node_index; + atidlas::vector_base * vector; + atidlas::matrix_base * matrix; + }; +}; + + +/** @brief Struct for holding the type family as well as the type of an operation (could be addition, subtraction, norm, etc.) */ +struct op_element +{ + operation_node_type_family type_family; + operation_node_type type; +}; + +/** @brief Main datastructure for an node in the statement tree */ +struct statement_node +{ + lhs_rhs_element lhs; + op_element op; + lhs_rhs_element rhs; +}; + +namespace result_of +{ + + template struct num_nodes { enum { value = 0 }; }; + template struct num_nodes< vector_expression > { enum { value = 1 + num_nodes::value + num_nodes::value + num_nodes::value }; }; + template struct num_nodes< const vector_expression > { enum { value = 1 + num_nodes::value + num_nodes::value + num_nodes::value }; }; + template struct num_nodes< matrix_expression > { enum { value = 1 + num_nodes::value + num_nodes::value + num_nodes::value }; }; + template struct num_nodes< const matrix_expression > { enum { value = 1 + num_nodes::value + num_nodes::value + num_nodes::value }; }; + template struct num_nodes< scalar_expression > { enum { value = 1 + num_nodes::value + num_nodes::value + num_nodes::value }; }; + template struct num_nodes< const scalar_expression > { enum { value = 1 + num_nodes::value + num_nodes::value + num_nodes::value }; }; + +} + +/** \brief The main class for representing a statement such as x = inner_prod(y,z); at runtime. + * + * This is the equivalent to an expression template tree, but entirely built at runtime in order to perform really cool stuff such as kernel fusion. + */ +class statement +{ +public: + typedef statement_node value_type; + typedef std::vector container_type; + + statement(container_type const & custom_array) : array_(custom_array) {} + + /** @brief Generate the runtime statement from an expression template. + * + * Constructing a runtime statement from expression templates makes perfect sense, because this way only a single allocation is needed when creating the statement. */ + template + statement(LHS & lhs, OP const &, RHS const & rhs) : array_(1 + result_of::num_nodes::value) + { + array_[0].op.type_family = operation_node_type_family(result_of::op_type_info::family); + array_[0].op.type = operation_node_type(result_of::op_type_info::id); + add_lhs(0, 1, lhs); + add_rhs(0, 1, rhs); + } + + container_type const & array() const { return array_; } + unsigned int root() const { return 0; } +private: + + //////////// Tree nodes (non-terminals) //////////////////// + + unsigned int add_element(unsigned int next_free, lhs_rhs_element & elem, vector_base const & x) + { + elem.type_family = VECTOR_TYPE_FAMILY; + elem.subtype = DENSE_VECTOR_TYPE; + elem.vector = const_cast(&x); + return next_free; + } + + template + unsigned int add_element(unsigned int next_free, + lhs_rhs_element & elem, + scalar_expression const & t) + { + elem.type_family = COMPOSITE_OPERATION_FAMILY; + elem.subtype = INVALID_SUBTYPE; + elem.node_index = next_free; + return add_node(next_free, next_free + 1, t); + } + + template + unsigned int add_element(unsigned int next_free, + lhs_rhs_element & elem, + vector_expression const & t) + { + elem.type_family = COMPOSITE_OPERATION_FAMILY; + elem.subtype = INVALID_SUBTYPE; + elem.node_index = next_free; + return add_node(next_free, next_free + 1, t); + } + + template + unsigned int add_element(unsigned int next_free, + lhs_rhs_element & elem, + matrix_expression const & t) + { + elem.type_family = COMPOSITE_OPERATION_FAMILY; + elem.subtype = INVALID_SUBTYPE; + elem.numeric_t = INVALID_NUMERIC_TYPE; + elem.node_index = next_free; + return add_node(next_free, next_free + 1, t); + } + + template + unsigned int add_lhs(unsigned int current_index, unsigned int next_free, T const & t) + { return add_element(next_free, array_[current_index].lhs, t); } + + template + unsigned int add_rhs(unsigned int current_index, unsigned int next_free, T const & t) + { return add_element(next_free, array_[current_index].rhs, t); } + + template class ExpressionT, typename LHS, typename RHS, typename OP> + unsigned int add_node(unsigned int current_index, unsigned int next_free, ExpressionT const & proxy) + { + // set OP: + array_[current_index].op.type_family = operation_node_type_family(result_of::op_type_info::family); + array_[current_index].op.type = operation_node_type(result_of::op_type_info::id); + + // set LHS and RHS: + if (array_[current_index].op.type_family == OPERATION_UNARY_TYPE_FAMILY) + { + // unary expression: set rhs to invalid: + array_[current_index].rhs.type_family = INVALID_TYPE_FAMILY; + array_[current_index].rhs.subtype = INVALID_SUBTYPE; + array_[current_index].rhs.numeric_t = INVALID_NUMERIC_TYPE; + return add_lhs(current_index, next_free, proxy.lhs()); + } + + return add_rhs(current_index, add_lhs(current_index, next_free, proxy.lhs()), proxy.rhs()); + } + + container_type array_; +}; + +class statements_container +{ +public: + typedef std::list data_type; + enum order_type { SEQUENTIAL, INDEPENDENT }; + + statements_container(data_type const & data, order_type order) : data_(data), order_(order) + { } + + statements_container(scheduler::statement const & s0) : order_(INDEPENDENT) + { + data_.push_back(s0); + } + + statements_container(scheduler::statement const & s0, scheduler::statement const & s1, order_type order) : order_(order) + { + data_.push_back(s0); + data_.push_back(s1); + } + + std::list const & data() const { return data_; } + order_type order() const { return order_; } +private: + std::list data_; + order_type order_; +}; + +} // namespace scheduler +} // namespace viennacl + +#endif + diff --git a/atidlas/tools/enable_if.hpp b/atidlas/tools/enable_if.hpp new file mode 100644 index 000000000..6c37b4ccb --- /dev/null +++ b/atidlas/tools/enable_if.hpp @@ -0,0 +1,43 @@ +#ifndef VIENNACL_META_ENABLE_IF_HPP_ +#define VIENNACL_META_ENABLE_IF_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp rupp@iue.tuwien.ac.at + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/meta/enable_if.hpp + @brief Simple enable-if variant that uses the SFINAE pattern +*/ + +namespace viennacl +{ + +/** @brief Simple enable-if variant that uses the SFINAE pattern */ +template +struct enable_if +{ + typedef T type; +}; + +/** \cond */ +template +struct enable_if {}; +/** \endcond */ + +} //namespace viennacl + + +#endif diff --git a/atidlas/tools/execution_handler.hpp b/atidlas/tools/execution_handler.hpp deleted file mode 100644 index 5bb280fa7..000000000 --- a/atidlas/tools/execution_handler.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef ATIDLAS_EXECUTION_HANDLER_HPP -#define ATIDLAS_EXECUTION_HANDLER_HPP - -#include - -#include "viennacl/tools/shared_ptr.hpp" - -#include "atidlas/tools/lazy_program_compiler.hpp" -#include "atidlas/backend/templates/template_base.hpp" - -namespace atidlas -{ - -class execution_handler -{ -public: - typedef std::map< std::string, tools::shared_ptr > container_type; - -private: - std::string append_prefix(std::string const & str) - { - return "_" + str; - } - - std::string define_extension(std::string const & ext) - { - // Note: On devices without double precision support, 'ext' is an empty string. - return (ext.length() > 1) ? std::string("#pragma OPENCL EXTENSION " + ext + " : enable\n") : std::string("\n"); - } - - void init_program_compiler(std::string const & name, bool force_recompilation) - { - lazy_programs_.push_back(lazy_program_compiler(&ctx_, name, force_recompilation)); - lazy_programs_.back().add(define_extension(device_.double_support_extension())); - } - -public: - execution_handler(std::string const & program_name_base, viennacl::ocl::context & ctx, viennacl::ocl::device const & device, bool force_recompilation = false) : ctx_(ctx), device_(device), program_names_(2), init_done_(false) - { - lazy_programs_.reserve(2); - init_program_compiler(program_name_base + "_0", force_recompilation); - init_program_compiler(program_name_base + "_1", force_recompilation); - } - - void add(std::string const & key, template_base const & T, statements_container const & statements) - { - if (kernels_.insert(container_type::value_type(key, T.clone())).second) - { - std::vector sources = kernels_.at(key)->generate(append_prefix(key), statements, device_); - assert(sources.size()<=2); - for (unsigned int i = 0; i < sources.size(); ++i) - lazy_programs_[i].add(sources[i]); - } - } - - template_base * template_of(std::string const & key) - { - return kernels_.at(key).get(); - } - - void execute(container_type::key_type const & key, statements_container const & statements) - { - tools::shared_ptr & template_pointer = kernels_.at(key); - template_pointer->enqueue(append_prefix(key), lazy_programs_, statements); - } - -private: - viennacl::ocl::context & ctx_; - viennacl::ocl::device const & device_; - container_type kernels_; - std::vector program_names_; - std::vector lazy_programs_; - bool init_done_; -}; - -} -#endif diff --git a/atidlas/tools/lazy_program_compiler.hpp b/atidlas/tools/lazy_program_compiler.hpp index c980ed389..d34348474 100644 --- a/atidlas/tools/lazy_program_compiler.hpp +++ b/atidlas/tools/lazy_program_compiler.hpp @@ -2,7 +2,7 @@ #define ATIDLAS_LAZY_PROGRAM_COMPILER_HPP #include -#include "viennacl/ocl/context.hpp" +#include namespace atidlas { @@ -11,29 +11,29 @@ namespace atidlas { public: - lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, std::string const & src, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), src_(src), force_recompilation_(force_recompilation){ } - lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), force_recompilation_(force_recompilation){ } + lazy_program_compiler(cl::Context * ctx, std::string const & name, std::string const & src, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), src_(src), force_recompilation_(force_recompilation){ } + lazy_program_compiler(cl::Context * ctx, std::string const & name, bool force_recompilation) : ctx_(ctx), program_(NULL), name_(name), force_recompilation_(force_recompilation){ } void add(std::string const & src) { src_+=src; } std::string const & src() const { return src_; } - viennacl::ocl::program & program() + cl::Program & program() { - if(program_==NULL) - { - if (force_recompilation_ && ctx_->has_program(name_)) - ctx_->delete_program(name_); - if (!ctx_->has_program(name_)) - ctx_->add_program(src_, name_); - program_ = &ctx_->get_program(name_); - } - return *program_; +// if(program_==NULL) +// { +// if (force_recompilation_ && ctx_->has_program(name_)) +// ctx_->delete_program(name_); +// if (!ctx_->has_program(name_)) +// ctx_->add_program(src_, name_); +// program_ = &ctx_->get_program(name_); +// } +// return *program_; } private: - viennacl::ocl::context * ctx_; - viennacl::ocl::program * program_; + cl::Context * ctx_; + cl::Program * program_; std::string name_; std::string src_; bool force_recompilation_; diff --git a/atidlas/tools/predicate.hpp b/atidlas/tools/predicate.hpp new file mode 100644 index 000000000..0afd7d2bd --- /dev/null +++ b/atidlas/tools/predicate.hpp @@ -0,0 +1,47 @@ +#ifndef ATIDLAS_PREDICATE_HPP_ +#define ATIDLAS_PREDICATE_HPP_ + +namespace atidlas +{ + +/** @brief Helper class for checking whether a type is a primitive type. */ +template +struct is_primitive_type{ enum {value = false}; }; + +template<> struct is_primitive_type { enum { value = true }; }; +template<> struct is_primitive_type { enum { value = true }; }; +template<> struct is_primitive_type { enum { value = true }; }; +template<> struct is_primitive_type { enum { value = true }; }; +template<> struct is_primitive_type { enum { value = true }; }; +template<> struct is_primitive_type { enum { value = true }; }; +template<> struct is_primitive_type { enum { value = true }; }; +template<> struct is_primitive_type { enum { value = true }; }; +template<> struct is_primitive_type{ enum { value = true }; }; +template<> struct is_primitive_type { enum { value = true }; }; + + +/** @brief Helper class for checking whether a particular type is a native OpenCL type. */ +template +struct is_cl_type{ enum { value = false }; }; + +template<> struct is_cl_type { enum { value = true }; }; +template<> struct is_cl_type{ enum { value = true }; }; +template<> struct is_cl_type { enum { value = true }; }; +template<> struct is_cl_type { enum { value = true }; }; +template<> struct is_cl_type { enum { value = true }; }; +template<> struct is_cl_type { enum { value = true }; }; +template<> struct is_cl_type { enum { value = true }; }; +template<> struct is_cl_type { enum { value = true }; }; +template<> struct is_cl_type{ enum { value = true }; }; +template<> struct is_cl_type { enum { value = true }; }; + +/** @brief Helper class for checking whether a particular type is a floating point type. */ +template struct is_floating_point { enum { value = false }; }; +template<> struct is_floating_point { enum { value = true }; }; +template<> struct is_floating_point { enum { value = true }; }; + + +} + + +#endif diff --git a/atidlas/tools/timer.hpp b/atidlas/tools/timer.hpp new file mode 100644 index 000000000..6f81e19f5 --- /dev/null +++ b/atidlas/tools/timer.hpp @@ -0,0 +1,123 @@ +#ifndef _VIENNACL_TOOLS_TIMER_HPP_ +#define _VIENNACL_TOOLS_TIMER_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp rupp@iue.tuwien.ac.at + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/tools/timer.hpp + @brief A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling. */ + +#include + + +#ifdef _WIN32 + +#define WINDOWS_LEAN_AND_MEAN +#include +#undef min +#undef max + +namespace viennacl +{ +namespace tools +{ + +/** @brief Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows). + * + * Avoids messing with Boost and should be sufficient for benchmarking purposes. + */ +class timer +{ +public: + + timer() + { + QueryPerformanceFrequency(&freq); + } + + void start() + { + QueryPerformanceCounter((LARGE_INTEGER*) &start_time); + } + + double get() const + { + LARGE_INTEGER elapsed; + QueryPerformanceCounter((LARGE_INTEGER*) &end_time); + elapsed.QuadPart = end_time.QuadPart - start_time.QuadPart; + return elapsed.QuadPart / static_cast(freq.QuadPart); + } + + +private: + LARGE_INTEGER freq; + LARGE_INTEGER start_time; + LARGE_INTEGER end_time; +}; + +} + +} + +#else + +#include + +namespace viennacl +{ +namespace tools +{ + +/** @brief Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows). + * + * Avoids messing with Boost and should be sufficient for benchmarking purposes. + */ +class timer +{ +public: + + timer() : ts(0) + {} + + void start() + { + struct timeval tval; + gettimeofday(&tval, NULL); + ts = static_cast(tval.tv_sec * 1000000 + tval.tv_usec); + } + + double get() const + { + struct timeval tval; + gettimeofday(&tval, NULL); + double end_time = static_cast(tval.tv_sec * 1000000 + tval.tv_usec); + + return static_cast(end_time-ts) / 1000000.0; + } + +private: + double ts; +}; + +} +} + + + +#endif +#endif diff --git a/atidlas/traits/size.hpp b/atidlas/traits/size.hpp new file mode 100644 index 000000000..779f89560 --- /dev/null +++ b/atidlas/traits/size.hpp @@ -0,0 +1,43 @@ +#ifndef ATIDLAS_TRAITS_SIZE_HPP_ +#define ATIDLAS_TRAITS_SIZE_HPP_ + +#include "atidlas/forwards.h" +#include "atidlas/tools/predicate.hpp" +#include + +namespace atidlas +{ +namespace traits +{ + +template +atidlas_int_t size(vector_expression const & proxy) +{ + int k = proxy.rhs(); + int A_size1 = static_cast(size1(proxy.lhs())); + int A_size2 = static_cast(size2(proxy.lhs())); + + int row_depth = std::min(A_size1, A_size1 + k); + int col_depth = std::min(A_size2, A_size2 - k); + + return atidlas_int_t(std::min(row_depth, col_depth)); +} + +template +atidlas_int_t size(vector_expression const & proxy) +{ return size2(proxy.lhs());} + +template +atidlas_int_t size(vector_expression const & proxy) +{ return size1(proxy.lhs());} + +inline atidlas_int_t size(vector_base const & x) +{ return x.size(); } + + + +} +} + + +#endif diff --git a/atidlas/vector.hpp b/atidlas/vector.hpp new file mode 100644 index 000000000..41609bf58 --- /dev/null +++ b/atidlas/vector.hpp @@ -0,0 +1,74 @@ +#ifndef ATIDLAS_VECTOR_H +#define ATIDLAS_VECTOR_H + +#include + +#include "atidlas/forwards.h" +#include "atidlas/scheduler/forwards.h" +#include "atidlas/expression_template.hpp" + +namespace atidlas +{ + +class vector : public vector_base +{ +public: + vector(atidlas_int_t size, numeric_type dtype, cl::Context context) : vector_base(size, dtype, context){} + + template + vector & operator=(T const & other) + { + vector_base::operator=(other); + return *this; + } +// using vector_base::operator+=; +// using vector_base::operator-=; +}; + +#define ATIDLAS_ADD_BINARY_OPERATOR(TYPE, OP) \ +template \ +TYPE ## _expression< const TYPE ## _expression< XL, XR, XOP>, const TYPE ## _expression< YL, YR, YOP>, OP> \ +operator + (TYPE ## _expression const & x, TYPE ## _expression const & y) \ +{ \ + assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \ + return TYPE ## _expression< const TYPE ## _expression, const TYPE ## _expression, OP>(x, y); \ +} \ + \ +template \ +TYPE ## _expression< const TYPE ## _expression< XL, XR, XOP>, const TYPE ## _base, OP> \ +operator + (TYPE ## _expression const & x, TYPE ## _base const & y) \ +{ \ + assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \ + return TYPE ## _expression< const TYPE ## _expression, const TYPE ## _base, OP>(x, y); \ +} \ + \ +template \ +TYPE ## _expression< const TYPE ## _expression< YL, YR, YOP>, const TYPE ## _base, OP> \ +operator + (TYPE ## _base const & x, TYPE ## _expression const & y) \ +{ \ + assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \ + return TYPE ## _expression, OP>(x, y); \ +} \ + \ +TYPE ## _expression< const TYPE ## _base, const TYPE ## _base, OP> \ +operator + (TYPE ## _base const & x, TYPE ## _base const & y) \ +{ \ + assert(x.size() == y.size() && bool("Incompatible TYPE sizes!")); \ + return TYPE ## _expression(x, y); \ +} + +ATIDLAS_ADD_BINARY_OPERATOR(vector, op_add) + +#undef ATIDLAS_ADD_BINARY_OPERATOR + +template +vector_base & vector_base::operator=(vector_expression const & operation) +{ + scheduler::statement s(*this, op_assign(), operation); + return *this; +} + +} + +#endif