diff --git a/bench/blas.cpp b/bench/blas.cpp index cde5342ed..c6fcb6a54 100644 --- a/bench/blas.cpp +++ b/bench/blas.cpp @@ -33,7 +33,7 @@ void bench(ad::numeric_type dtype) total_time = 0;\ OP;\ ad::cl_ext::synchronize(ad::cl_ext::default_context());\ - while(total_time < 1e-1){\ + while(total_time < 5e-1){\ timer.start(); \ OP;\ SYNC;\ @@ -41,7 +41,7 @@ void bench(ad::numeric_type dtype) total_time += times.back();\ }\ float tres = median(times);\ - std::cout << " " << tres << std::flush;\ + std::cout << " " << PERF << std::flush;\ } #define CL_BENCHMARK(OP, PERF) BENCHMARK(OP, PERF, ad::cl_ext::synchronize(ad::cl_ext::default_context())) @@ -64,7 +64,9 @@ void bench(ad::numeric_type dtype) ad::array x(N, dtype), y(N, dtype); ad::array_expression E = ad::detail::assign(y, x + y); ad::model & model = ad::get_model(ad::cl_ext::get_queue(x.context(), 0), ad::VECTOR_AXPY_TYPE, dtype); - CL_BENCHMARK(model.execute(E), bandwidth(3*N, tres, dtsize)); + ad::model::runtime_options opt("saxpy"); + model.tune(E); + CL_BENCHMARK(model.execute(E, opt), bandwidth(3*N, tres, dtsize)); /* clAmdBlas */ #ifdef BENCH_CLAMDBLAS CL_BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize)) diff --git a/bench/common.hpp b/bench/common.hpp index 8889e442e..d23dc83df 100644 --- a/bench/common.hpp +++ b/bench/common.hpp @@ -73,4 +73,14 @@ T median(std::vector x) return x[size / 2]; } +template +T mean(std::vector x) +{ + T res = 0; + int N = x.size(); + for(int i = 0 ; i < N ; ++i) + res += x[i]; + return res/N; +} + #endif diff --git a/include/atidlas/backend/parse.h b/include/atidlas/backend/parse.h index 1cf9ab6b6..e9ea7db4f 100644 --- a/include/atidlas/backend/parse.h +++ b/include/atidlas/backend/parse.h @@ -113,7 +113,7 @@ std::string evaluate(leaf_t leaf, std::map const & acc atidlas::array_expression const & array_expression, int_t root_idx, mapping_type const & mapping); void evaluate(kernel_generation_stream & stream, leaf_t leaf, std::map const & accessors, - array_expressions_container const & array_expressions, std::vector const & mappings); + expressions_tuple const & expressions, std::vector const & mappings); /** @brief functor for fetching or writing-back the elements in a array_expression */ class process_traversal : public traversal_functor @@ -133,7 +133,7 @@ void process(kernel_generation_stream & stream, leaf_t leaf, std::map & already_processed); void process(kernel_generation_stream & stream, leaf_t leaf, std::map const & accessors, - array_expressions_container const & array_expressions, std::vector const & mappings); + expressions_tuple const & expressions, std::vector const & mappings); class array_expression_representation_functor : public traversal_functor{ diff --git a/include/atidlas/backend/templates/base.h b/include/atidlas/backend/templates/base.h index 8f3562727..bd2d2b289 100644 --- a/include/atidlas/backend/templates/base.h +++ b/include/atidlas/backend/templates/base.h @@ -69,7 +69,7 @@ public: protected: - /** @brief Functor to map the array_expressions to the types defined in mapped_objects.hpp */ + /** @brief Functor to map the expressions to the types defined in mapped_objects.hpp */ class map_functor : public traversal_functor { /** @brief Accessor for the numeric type */ @@ -130,9 +130,9 @@ protected: size_t root_idx, leaf_t leaf); static std::string neutral_element(op_element const & op); static std::string generate_arguments(std::vector const & mappings, std::map const & accessors, - array_expressions_container const & array_expressions); + expressions_tuple const & expressions); static std::string generate_arguments(std::string const & data_type, std::vector const & mappings, - array_expressions_container const & array_expressions); + expressions_tuple const & expressions); static void fill_kernel_name(char * ptr, unsigned int label, const char * suffix); static bool is_node_trans(array_expression::container_type const & array, size_t root_idx, leaf_t leaf_type); static std::string append_simd_suffix(std::string const & str, unsigned int i); @@ -147,24 +147,24 @@ protected: static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr); static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr); static std::string append_width(std::string const & str, unsigned int width); - static bool requires_fallback(array_expressions_container const & array_expressions); - void set_arguments(array_expressions_container const & array_expressions, cl::Kernel & kernel, unsigned int & current_arg); + static bool requires_fallback(expressions_tuple const & expressions); + void set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg); private: - virtual std::vector generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mapping) const = 0; + virtual std::vector generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mapping) const = 0; public: base(binding_policy_t binding_policy); - virtual unsigned int lmem_usage(array_expressions_container const &) const; - virtual unsigned int registers_usage(array_expressions_container const &) const; - virtual std::vector input_sizes(array_expressions_container const & array_expressions) = 0; + virtual unsigned int lmem_usage(expressions_tuple const &) const; + virtual unsigned int registers_usage(expressions_tuple const &) const; + virtual std::vector input_sizes(expressions_tuple const & expressions) = 0; virtual ~base(); - std::vector generate(unsigned int label, array_expressions_container const & array_expressions, cl::Device const & device); - virtual int check_invalid(array_expressions_container const & array_expressions, cl::Device const & device) const = 0; + std::vector generate(unsigned int label, expressions_tuple const & expressions, cl::Device const & device); + virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0; virtual void enqueue(cl::CommandQueue & queue, std::vector & programs, - unsigned int label, array_expressions_container const & array_expressions) = 0; + unsigned int label, expressions_tuple const & expressions) = 0; virtual tools::shared_ptr clone() const = 0; private: binding_policy_t binding_policy_; @@ -175,7 +175,7 @@ template class base_impl : public base { private: - virtual int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; + virtual int check_invalid_impl(cl::Device const &, expressions_tuple const &) const; public: typedef ParametersType parameters_type; base_impl(parameters_type const & parameters, binding_policy_t binding_policy); @@ -183,7 +183,7 @@ public: int_t local_size_1() const; tools::shared_ptr clone() const; /** @brief returns whether or not the profile has undefined behavior on particular device */ - int check_invalid(array_expressions_container const & array_expressions, cl::Device const & device) const; + int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const; protected: parameters_type p_; binding_policy_t binding_policy_; diff --git a/include/atidlas/backend/templates/maxpy.h b/include/atidlas/backend/templates/maxpy.h index 9b3271a79..c11b5af1e 100644 --- a/include/atidlas/backend/templates/maxpy.h +++ b/include/atidlas/backend/templates/maxpy.h @@ -20,14 +20,14 @@ public: class maxpy : public base_impl { private: - int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; - std::string generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings, unsigned int simd_width) const; - std::vector generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const; + int check_invalid_impl(cl::Device const &, expressions_tuple const &) const; + std::string generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings, unsigned int simd_width) const; + std::vector generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const; public: maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE); maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE); - std::vector input_sizes(array_expressions_container const & array_expressions); - void enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, array_expressions_container const & array_expressions); + std::vector input_sizes(expressions_tuple const & expressions); + void enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, expressions_tuple const & expressions); }; } diff --git a/include/atidlas/backend/templates/mproduct.h b/include/atidlas/backend/templates/mproduct.h index bff8fee2a..86b9476c0 100644 --- a/include/atidlas/backend/templates/mproduct.h +++ b/include/atidlas/backend/templates/mproduct.h @@ -33,25 +33,25 @@ struct mproduct_parameters : public base::parameters_type class mproduct : public base_impl { private: - unsigned int lmem_usage(array_expressions_container const & array_expressions) const; - unsigned int registers_usage(array_expressions_container const & array_expressions) const; - int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; - std::string generate_impl(unsigned int label, const char * id, const array_expressions_container &array_expressions, const std::vector &, bool fallback) const; - std::vector generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const; + unsigned int lmem_usage(expressions_tuple const & expressions) const; + unsigned int registers_usage(expressions_tuple const & expressions) const; + int check_invalid_impl(cl::Device const &, expressions_tuple const &) const; + std::string generate_impl(unsigned int label, const char * id, const expressions_tuple &expressions, const std::vector &, bool fallback) const; + std::vector generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const; void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K, array_infos const & A, array_infos const & B, array_infos const & C, value_scalar const & alpha, value_scalar const & beta, std::vector & programs, unsigned int label, int id); array_infos create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap); - std::vector infos(array_expressions_container const & array_expressions, + std::vector infos(expressions_tuple const & expressions, lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B); public: mproduct(mproduct::parameters_type const & parameters, char A_trans, char B_trans); - std::vector input_sizes(array_expressions_container const & array_expressions); + std::vector input_sizes(expressions_tuple const & expressions); void enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, - array_expressions_container const & array_expressions); + expressions_tuple const & expressions); private: const char A_trans_; diff --git a/include/atidlas/backend/templates/mreduction.h b/include/atidlas/backend/templates/mreduction.h index 8f201a6ac..5b9aa3648 100644 --- a/include/atidlas/backend/templates/mreduction.h +++ b/include/atidlas/backend/templates/mreduction.h @@ -29,13 +29,13 @@ protected: }; mreduction(mreduction::parameters_type const & , reduction_type, binding_policy_t); private: - virtual int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; + virtual int check_invalid_impl(cl::Device const &, expressions_tuple const &) const; unsigned int lmem_usage() const; - std::string generate_impl(unsigned int, array_expressions_container const &, std::vector const &, unsigned int, std::vector const &) const; - std::vector generate_impl(unsigned int, array_expressions_container const &, std::vector const &) const; + std::string generate_impl(unsigned int, expressions_tuple const &, std::vector const &, unsigned int, std::vector const &) const; + std::vector generate_impl(unsigned int, expressions_tuple const &, std::vector const &) const; public: - virtual std::vector input_sizes(array_expressions_container const & array_expressions); - void enqueue(cl::CommandQueue & queue,std::vector & programs,unsigned int label, array_expressions_container const & array_expressions); + virtual std::vector input_sizes(expressions_tuple const & expressions); + void enqueue(cl::CommandQueue & queue,std::vector & programs,unsigned int label, expressions_tuple const & expressions); private: reduction_type reduction_type_; }; diff --git a/include/atidlas/backend/templates/reduction.h b/include/atidlas/backend/templates/reduction.h index e25f7edf9..65ecf6500 100644 --- a/include/atidlas/backend/templates/reduction.h +++ b/include/atidlas/backend/templates/reduction.h @@ -18,21 +18,21 @@ struct reduction_parameters : public base::parameters_type class reduction : public base_impl { private: - unsigned int lmem_usage(array_expressions_container const & array_expressions) const; - int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; + unsigned int lmem_usage(expressions_tuple const & expressions) const; + int check_invalid_impl(cl::Device const &, expressions_tuple const &) const; inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector exprs, std::string const & buf_str, std::string const & buf_value_str) const; - std::string generate_impl(unsigned int label, const char * type, array_expressions_container const & array_expressions, std::vector const & mappings, unsigned int simd_width) const; - std::vector generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const; + std::string generate_impl(unsigned int label, const char * type, expressions_tuple const & expressions, std::vector const & mappings, unsigned int simd_width) const; + std::vector generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const; public: reduction(reduction::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE); reduction(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE); - std::vector input_sizes(array_expressions_container const & array_expressions); + std::vector input_sizes(expressions_tuple const & expressions); void enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, - array_expressions_container const & array_expressions); + expressions_tuple const & expressions); private: std::vector< cl::Buffer > tmp_; std::vector< cl::Buffer > tmpidx_; diff --git a/include/atidlas/backend/templates/vaxpy.h b/include/atidlas/backend/templates/vaxpy.h index ca4f97166..be04477c9 100644 --- a/include/atidlas/backend/templates/vaxpy.h +++ b/include/atidlas/backend/templates/vaxpy.h @@ -17,14 +17,14 @@ public: class vaxpy : public base_impl { private: - virtual int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; - std::vector generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const; + virtual int check_invalid_impl(cl::Device const &, expressions_tuple const &) const; + std::vector generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const; public: vaxpy(vaxpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE); vaxpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_ALL_UNIQUE); - std::vector input_sizes(array_expressions_container const & array_expressions); + std::vector input_sizes(expressions_tuple const & expressions); void enqueue(cl::CommandQueue & queue, std::vector & programs, - unsigned int label, array_expressions_container const & array_expressions); + unsigned int label, expressions_tuple const & expressions); }; } diff --git a/include/atidlas/model/model.h b/include/atidlas/model/model.h index 4f7bd8221..0448fe7ba 100644 --- a/include/atidlas/model/model.h +++ b/include/atidlas/model/model.h @@ -17,19 +17,29 @@ namespace atidlas class model { typedef std::vector< tools::shared_ptr > templates_container; + public: + struct runtime_options + { + runtime_options() : label(-1), recompile(false){} + runtime_options(std::string const & p) : program_name(p), label(-1), recompile(false){} + + std::string program_name; + int label; + bool recompile; + }; private: std::string define_extension(std::string const & extensions, std::string const & ext); - inline void fill_program_name(char* program_name, array_expressions_container const & array_expressions, binding_policy_t binding_policy); - std::vector& init(array_expressions_container const & array_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation); + inline void fill_program_name(char* program_name, expressions_tuple const & expressions, binding_policy_t binding_policy); + std::vector& init(expressions_tuple const & expressions, runtime_options const & opt = runtime_options()); public: model(predictors::random_forest const &, std::vector< tools::shared_ptr > const &, cl::CommandQueue &); model(std::vector< tools::shared_ptr > const &, cl::CommandQueue &); model(base const &, cl::CommandQueue &); - void execute(array_expressions_container const &, bool bypass_predictor = false, bool force_recompilation = false); - void tune(array_expressions_container const &); + void execute(expressions_tuple const &, runtime_options const & opt = runtime_options()); + void tune(expressions_tuple const &); templates_container const & templates() const; private: diff --git a/include/atidlas/model/predictors/random_forest.h b/include/atidlas/model/predictors/random_forest.h index 7c55ce506..07e2edb92 100644 --- a/include/atidlas/model/predictors/random_forest.h +++ b/include/atidlas/model/predictors/random_forest.h @@ -38,6 +38,7 @@ public: std::vector const & estimators() const; private: std::vector estimators_; + int_t D_; }; } diff --git a/include/atidlas/symbolic/expression.h b/include/atidlas/symbolic/expression.h index 080ae56d4..3b566bd57 100644 --- a/include/atidlas/symbolic/expression.h +++ b/include/atidlas/symbolic/expression.h @@ -208,7 +208,7 @@ private: size4 shape_; }; -class array_expressions_container +class expressions_tuple { private: tools::shared_ptr create(array_expression const & s); @@ -216,9 +216,9 @@ public: typedef std::list > data_type; enum order_type { SEQUENTIAL, INDEPENDENT }; - array_expressions_container(array_expression const & s0); - array_expressions_container(order_type order, array_expression const & s0, array_expression const & s1); - array_expressions_container(data_type const & data, order_type order); + expressions_tuple(array_expression const & s0); + expressions_tuple(order_type order, array_expression const & s0, array_expression const & s1); + expressions_tuple(data_type const & data, order_type order); data_type const & data() const; cl::Context const & context() const; diff --git a/lib/backend/parse.cpp b/lib/backend/parse.cpp index c996f058b..556cc9ebf 100644 --- a/lib/backend/parse.cpp +++ b/lib/backend/parse.cpp @@ -303,12 +303,12 @@ std::string evaluate(leaf_t leaf, std::map const & acc } void evaluate(kernel_generation_stream & stream, leaf_t leaf, std::map const & accessors, - array_expressions_container const & array_expressions, std::vector const & mappings) + expressions_tuple const & expressions, std::vector const & mappings) { - array_expressions_container::data_type::const_iterator sit; + expressions_tuple::data_type::const_iterator sit; std::vector::const_iterator mit; - for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++mit, ++sit) + for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++mit, ++sit) stream << evaluate(leaf, accessors, **sit, (*sit)->root(), *mit) << ";" << std::endl; } @@ -368,13 +368,13 @@ void process(kernel_generation_stream & stream, leaf_t leaf, std::map const & accessors, - array_expressions_container const & array_expressions, std::vector const & mappings) + expressions_tuple const & expressions, std::vector const & mappings) { - array_expressions_container::data_type::const_iterator sit; + expressions_tuple::data_type::const_iterator sit; std::vector::const_iterator mit; std::set already_processed; - for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++mit, ++sit) + for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++mit, ++sit) process(stream, leaf, accessors, **sit, (*sit)->root(), *mit, already_processed); } diff --git a/lib/backend/templates/base.cpp b/lib/backend/templates/base.cpp index 029681077..87bd8864c 100644 --- a/lib/backend/templates/base.cpp +++ b/lib/backend/templates/base.cpp @@ -258,30 +258,30 @@ std::string base::neutral_element(op_element const & op) } } -std::string base::generate_arguments(std::vector const & mappings, std::map const & accessors, array_expressions_container const & array_expressions) +std::string base::generate_arguments(std::vector const & mappings, std::map const & accessors, expressions_tuple const & expressions) { kernel_generation_stream stream; - process(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings); + process(stream, PARENT_NODE_TYPE, accessors, expressions, mappings); std::string res = stream.str(); res.erase(res.rfind(',')); return res; } -std::string base::generate_arguments(std::string const & data_type, std::vector const & mappings, array_expressions_container const & array_expressions) +std::string base::generate_arguments(std::string const & data_type, std::vector const & mappings, expressions_tuple const & expressions) { return generate_arguments(mappings, tools::make_map >("array0", "__global #scalartype* #pointer, uint #start,") ("host_scalar", "#scalartype #name,") ("array1", "__global " + data_type + "* #pointer, uint #start, uint #stride,") ("array2", "__global " + data_type + "* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,") - ("tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"), array_expressions); + ("tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"), expressions); } -void base::set_arguments(array_expressions_container const & array_expressions, cl::Kernel & kernel, unsigned int & current_arg) +void base::set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg) { tools::shared_ptr binder = make_binder(); - for (array_expressions_container::data_type::const_iterator itt = array_expressions.data().begin(); itt != array_expressions.data().end(); ++itt) + for (expressions_tuple::data_type::const_iterator itt = expressions.data().begin(); itt != expressions.data().end(); ++itt) traverse(**itt, (*itt)->root(), set_arguments_functor(*binder, current_arg, kernel), true); } @@ -370,9 +370,9 @@ bool base::is_strided(array_expression::node const & node) || node.op.type==OPERATOR_OUTER_PROD_TYPE; } -bool base::requires_fallback(array_expressions_container const & array_expressions) +bool base::requires_fallback(expressions_tuple const & expressions) { - for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it) + for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it) for(array_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt) if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0)) || (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0))) @@ -490,34 +490,34 @@ tools::shared_ptr base::make_binder() base::base(binding_policy_t binding_policy) : binding_policy_(binding_policy) {} -unsigned int base::lmem_usage(array_expressions_container const &) const +unsigned int base::lmem_usage(expressions_tuple const &) const { return 0; } -unsigned int base::registers_usage(array_expressions_container const &) const +unsigned int base::registers_usage(expressions_tuple const &) const { return 0; } base::~base() { } -std::vector base::generate(unsigned int label, array_expressions_container const & array_expressions, cl::Device const & device) +std::vector base::generate(unsigned int label, expressions_tuple const & expressions, cl::Device const & device) { - array_expressions_container::data_type::const_iterator sit; + expressions_tuple::data_type::const_iterator sit; std::vector::iterator mit; - if(int err = check_invalid(array_expressions, device)) + if(int err = check_invalid(expressions, device)) throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err)); //Create mapping - std::vector mappings(array_expressions.data().size()); + std::vector mappings(expressions.data().size()); tools::shared_ptr binder = make_binder(); - for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++sit, ++mit) + for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit) traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true); - return generate_impl(label, array_expressions, mappings); + return generate_impl(label, expressions, mappings); } template -int base_impl::check_invalid_impl(cl::Device const &, array_expressions_container const &) const +int base_impl::check_invalid_impl(cl::Device const &, expressions_tuple const &) const { return TEMPLATE_VALID; } template @@ -537,11 +537,11 @@ tools::shared_ptr base_impl::clone() const { return tools::shared_ptr(new TType(*dynamic_cast(this))); } template -int base_impl::check_invalid(array_expressions_container const & array_expressions, cl::Device const & device) const +int base_impl::check_invalid(expressions_tuple const & expressions, cl::Device const & device) const { //Query device informations size_t lmem_available = device.getInfo(); - size_t lmem_used = lmem_usage(array_expressions); + size_t lmem_used = lmem_usage(expressions); if (lmem_used>lmem_available) return TEMPLATE_LOCAL_MEMORY_OVERFLOW; @@ -575,7 +575,7 @@ int base_impl::check_invalid(array_expressions_container const & a p_.simd_width!=16) return TEMPLATE_INVALID_SIMD_WIDTH; - return check_invalid_impl(device, array_expressions); + return check_invalid_impl(device, expressions); } template class base_impl; diff --git a/lib/backend/templates/maxpy.cpp b/lib/backend/templates/maxpy.cpp index 431bf55d3..e37311d9b 100644 --- a/lib/backend/templates/maxpy.cpp +++ b/lib/backend/templates/maxpy.cpp @@ -14,7 +14,7 @@ maxpy_parameters::maxpy_parameters(unsigned int _simd_width, -int maxpy::check_invalid_impl(cl::Device const &, array_expressions_container const &) const +int maxpy::check_invalid_impl(cl::Device const &, expressions_tuple const &) const { if (p_.simd_width>1) return TEMPLATE_INVALID_SIMD_WIDTH; @@ -23,7 +23,7 @@ int maxpy::check_invalid_impl(cl::Device const &, array_expressions_container co return TEMPLATE_VALID; } -std::string maxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings, unsigned int simd_width) const +std::string maxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings, unsigned int simd_width) const { kernel_generation_stream stream; @@ -33,13 +33,13 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container fill_kernel_name(kprefix, label, "d"); stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; - stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); process(stream, PARENT_NODE_TYPE, tools::make_map >("array0", "#scalartype #namereg = #pointer[#start];") ("array1", "#pointer += #start;") - ("array2", "#pointer = &$VALUE{#start1, #start2};"), array_expressions, mappings); + ("array2", "#pointer = &$VALUE{#start1, #start2};"), expressions, mappings); fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)"); stream << "for(unsigned int i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl; @@ -55,7 +55,7 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container ("vdiag", "#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:$VALUE{min(i*#stride1, j*#stride1)};") ("repeat", "#scalartype #namereg = $VALUE{(i%#tuplearg0)*#stride1, (j%#tuplearg1)*#stride2};") ("outer", "#scalartype #namereg = ($LVALUE{i*#stride})*($RVALUE{j*#stride});") - , array_expressions, mappings); + , expressions, mappings); evaluate(stream, PARENT_NODE_TYPE, tools::make_map > ("array2", "#namereg") @@ -64,10 +64,10 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container ("array0", "#namereg") ("outer", "#namereg") ("cast", "convert_"+data_type) - , array_expressions, mappings); + , expressions, mappings); process(stream, LHS_NODE_TYPE, tools::make_map >("array2", "$VALUE{i*#stride1,j*#stride2} = #namereg;") - , array_expressions, mappings); + , expressions, mappings); stream.dec_tab(); stream << "}" << std::endl; @@ -81,10 +81,10 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container return stream.str(); } -std::vector maxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const +std::vector maxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const { std::vector res; - res.push_back(generate_impl(label, array_expressions, mappings, 1)); + res.push_back(generate_impl(label, expressions, mappings, 1)); return res; } @@ -97,9 +97,9 @@ maxpy::maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, base_impl(maxpy_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind) {} -std::vector maxpy::input_sizes(array_expressions_container const & array_expressions) +std::vector maxpy::input_sizes(expressions_tuple const & expressions) { - atidlas::array_expression const & array_expression = *(array_expressions.data().front()); + atidlas::array_expression const & array_expression = *(expressions.data().front()); std::pair size = matrix_size(lhs_most(array_expression.tree(), array_expression.root())); return tools::make_vector() << size.first << size.second; } @@ -107,7 +107,7 @@ std::vector maxpy::input_sizes(array_expressions_container const & array_ void maxpy::enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, - array_expressions_container const & array_expressions) + expressions_tuple const & expressions) { char kname[10]; fill_kernel_name(kname, label, "d"); @@ -116,10 +116,10 @@ void maxpy::enqueue(cl::CommandQueue & queue, cl::NDRange grange(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1); cl::NDRange lrange(p_.local_size_0, p_.local_size_1); unsigned int current_arg = 0; - std::vector MN = input_sizes(array_expressions); + std::vector MN = input_sizes(expressions); kernel.setArg(current_arg++, cl_uint(MN[0])); kernel.setArg(current_arg++, cl_uint(MN[1])); - set_arguments(array_expressions, kernel, current_arg); + set_arguments(expressions, kernel, current_arg); queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange); } diff --git a/lib/backend/templates/mproduct.cpp b/lib/backend/templates/mproduct.cpp index fb7ef19e1..b4531e4eb 100644 --- a/lib/backend/templates/mproduct.cpp +++ b/lib/backend/templates/mproduct.cpp @@ -17,9 +17,9 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width mL(ms*local_size_0), nL(ns*local_size_1){} - unsigned int mproduct::lmem_usage(array_expressions_container const & array_expressions) const + unsigned int mproduct::lmem_usage(expressions_tuple const & expressions) const { - atidlas::array_expression const & array_expression = (*array_expressions.data().front()); + atidlas::array_expression const & array_expression = (*expressions.data().front()); numeric_type numeric_t = lhs_most(array_expression.tree(), array_expression.root()).lhs.dtype; unsigned int N = 0; @@ -30,16 +30,16 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width return N*size_of(numeric_t); } - unsigned int mproduct::registers_usage(array_expressions_container const & array_expressions) const + unsigned int mproduct::registers_usage(expressions_tuple const & expressions) const { - atidlas::array_expression const & array_expression = (*array_expressions.data().front()); + atidlas::array_expression const & array_expression = (*expressions.data().front()); numeric_type numeric_t = lhs_most(array_expression.tree(), array_expression.root()).lhs.dtype; unsigned int N = p_.mS * p_.nS + p_.mS * p_.kS + p_.kS * p_.nS; return N*size_of(numeric_t); } - int mproduct::check_invalid_impl(cl::Device const &, array_expressions_container const &) const + int mproduct::check_invalid_impl(cl::Device const &, expressions_tuple const &) const { if (p_.A_fetching_policy!=FETCH_FROM_LOCAL && p_.B_fetching_policy!=FETCH_FROM_LOCAL&& (p_.local_fetch_0!=0 || p_.local_fetch_1!=0)) return TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH; @@ -87,7 +87,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width return TEMPLATE_VALID; } - std::string mproduct::generate_impl(unsigned int label, const char * id, const array_expressions_container &array_expressions, const std::vector &, bool fallback) const + std::string mproduct::generate_impl(unsigned int label, const char * id, const expressions_tuple &expressions, const std::vector &, bool fallback) const { using std::string; using tools::to_string; @@ -106,7 +106,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width /// INIT /// ////////////// kernel_generation_stream stream; - array_expression const & st = (*array_expressions.data().front()); + array_expression const & st = (*expressions.data().front()); numeric_type dtype = lhs_most(st.tree(), st.root()).lhs.dtype; std::string dtypestr = numeric_type_to_string(dtype); @@ -557,11 +557,11 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width #undef VST0RE } - std::vector mproduct::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const + std::vector mproduct::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const { std::vector res; - res.push_back(generate_impl(label, "o", array_expressions, mappings, false)); - res.push_back(generate_impl(label, "f", array_expressions, mappings, true)); + res.push_back(generate_impl(label, "o", expressions, mappings, false)); + res.push_back(generate_impl(label, "f", expressions, mappings, true)); return res; } @@ -615,10 +615,10 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width return result; } - std::vector mproduct::infos(array_expressions_container const & array_expressions, + std::vector mproduct::infos(expressions_tuple const & expressions, lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B) { - atidlas::array_expression const & array_expression = (*array_expressions.data().front()); + atidlas::array_expression const & array_expression = (*expressions.data().front()); array_expression::container_type const & array = array_expression.tree(); std::size_t root = array_expression.root(); @@ -640,18 +640,18 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width mproduct::mproduct(mproduct_parameters const & parameters, char A_trans, char B_trans) : base_impl(parameters, BIND_ALL_UNIQUE), A_trans_(A_trans), B_trans_(B_trans) { } - std::vector mproduct::input_sizes(array_expressions_container const & array_expressions) + std::vector mproduct::input_sizes(expressions_tuple const & expressions) { lhs_rhs_element d0, d1, d2; - return infos(array_expressions, d0, d1, d2); + return infos(expressions, d0, d1, d2); } - void mproduct::enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, array_expressions_container const & array_expressions) + void mproduct::enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, expressions_tuple const & expressions) { using namespace tools; lhs_rhs_element C, A, B; - std::vector MNK = infos(array_expressions, C, A, B); + std::vector MNK = infos(expressions, C, A, B); int_t M = MNK[0]; int_t N = MNK[1]; diff --git a/lib/backend/templates/mreduction.cpp b/lib/backend/templates/mreduction.cpp index 5f62b72a6..9124d7539 100644 --- a/lib/backend/templates/mreduction.cpp +++ b/lib/backend/templates/mreduction.cpp @@ -14,7 +14,7 @@ mreduction_parameters::mreduction_parameters(unsigned int _simd_width, num_groups_0(_num_groups_0), fetch_policy(_fetch_policy) { } -int mreduction::check_invalid_impl(cl::Device const &, array_expressions_container const &) const +int mreduction::check_invalid_impl(cl::Device const &, expressions_tuple const &) const { if (p_.fetch_policy==FETCH_FROM_LOCAL) return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; @@ -26,7 +26,7 @@ unsigned int mreduction::lmem_usage() const return p_.local_size_0*(p_.local_size_1+1); } -std::string mreduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings, unsigned int simd_width, std::vector const & exprs) const +std::string mreduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings, unsigned int simd_width, std::vector const & exprs) const { using tools::to_string; @@ -40,7 +40,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont fill_kernel_name(kprefix, label, "d"); stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; - stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -48,7 +48,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont tools::make_map >("array0", "#scalartype #namereg = #pointer[#start];") ("array1", "#pointer += #start;") ("array2", "#pointer += #start1 + #start2*#ld; " - "#ld *= #nldstride; "), array_expressions, mappings); + "#ld *= #nldstride; "), expressions, mappings); for (std::vector::const_iterator it = exprs.begin(); it != exprs.end(); ++it) stream << (*it)->process("__local #scalartype #name_buf[" + to_string(lsize0*lsize1) + "];") << std::endl; @@ -160,7 +160,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont std::map accessors; accessors["mreduction"] = "#name_buf[lid0*" + lsize1str + "]"; accessors["array1"] = "#pointer[r*#stride]"; - evaluate(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings); + evaluate(stream, PARENT_NODE_TYPE, accessors, expressions, mappings); stream.dec_tab(); stream << "}" << std::endl; @@ -174,14 +174,14 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont return stream.str(); } -std::vector mreduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const +std::vector mreduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const { std::vector exprs; - array_expressions_container::data_type::const_iterator sit; + expressions_tuple::data_type::const_iterator sit; std::vector::const_iterator mit; - for (mit = mappings.begin(), sit = array_expressions.data().begin(); mit != mappings.end(); ++mit, ++sit) + for (mit = mappings.begin(), sit = expressions.data().begin(); mit != mappings.end(); ++mit, ++sit) { - array_expression const & first_expression = *array_expressions.data().front(); + array_expression const & first_expression = *expressions.data().front(); std::vector idx = filter_nodes(&is_reduction, first_expression, false); for (unsigned int j = 0; j < idx.size(); ++j) exprs.push_back((mapped_mreduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get())); @@ -190,11 +190,11 @@ std::vector mreduction::generate_impl(unsigned int label, array_exp std::vector res; if (reduction_type_ && p_.simd_width>1) { - res.push_back(generate_impl(label, array_expressions, mappings, p_.simd_width, exprs)); - res.push_back(generate_impl(label, array_expressions, mappings, 1, exprs)); + res.push_back(generate_impl(label, expressions, mappings, p_.simd_width, exprs)); + res.push_back(generate_impl(label, expressions, mappings, 1, exprs)); } else - res.push_back(generate_impl(label, array_expressions, mappings, 1, exprs)); + res.push_back(generate_impl(label, expressions, mappings, 1, exprs)); return res; } @@ -204,9 +204,9 @@ mreduction::mreduction(mreduction::parameters_type const & parameters, base_impl(parameters, binding_policy), reduction_type_(rtype){ } -std::vector mreduction::input_sizes(array_expressions_container const & array_expressions) +std::vector mreduction::input_sizes(expressions_tuple const & expressions) { - array_expression const & first_expression = *array_expressions.data().front(); + array_expression const & first_expression = *expressions.data().front(); std::vector idx = filter_nodes(&is_reduction, first_expression, false); std::pair MN = matrix_size(lhs_most(first_expression.tree(), idx[0])); if(reduction_type_==REDUCE_COLUMNS) @@ -217,15 +217,15 @@ std::vector mreduction::input_sizes(array_expressions_container const & a void mreduction::enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, - array_expressions_container const & array_expressions) + expressions_tuple const & expressions) { char kname[10]; fill_kernel_name(kname, label, "d"); - std::vector MN = input_sizes(array_expressions); + std::vector MN = input_sizes(expressions); //Kernel int idx = 0; - if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && requires_fallback(array_expressions)) + if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && requires_fallback(expressions)) idx = 1; cl::Program & program = programs[idx].program(); cl::Kernel kernel(program, kname); @@ -237,7 +237,7 @@ void mreduction::enqueue(cl::CommandQueue & queue, unsigned int current_arg = 0; kernel.setArg(current_arg++, cl_uint(MN[0])); kernel.setArg(current_arg++, cl_uint(MN[1])); - set_arguments(array_expressions, kernel, current_arg); + set_arguments(expressions, kernel, current_arg); queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange); } diff --git a/lib/backend/templates/reduction.cpp b/lib/backend/templates/reduction.cpp index 8d0299acb..6ebc1d677 100644 --- a/lib/backend/templates/reduction.cpp +++ b/lib/backend/templates/reduction.cpp @@ -13,10 +13,10 @@ reduction_parameters::reduction_parameters(unsigned int _simd_width, fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _group_size, 1, 2), num_groups(_num_groups), fetching_policy(_fetching_policy) { } -unsigned int reduction::lmem_usage(array_expressions_container const & array_expressions) const +unsigned int reduction::lmem_usage(expressions_tuple const & expressions) const { unsigned int res = 0; - for(array_expressions_container::data_type::const_iterator it = array_expressions.data().begin() ; it != array_expressions.data().end() ; ++it) + for(expressions_tuple::data_type::const_iterator it = expressions.data().begin() ; it != expressions.data().end() ; ++it) { numeric_type numeric_t= lhs_most((*it)->tree(), (*it)->root()).lhs.dtype; res += p_.local_size_0*size_of(numeric_t); @@ -24,7 +24,7 @@ unsigned int reduction::lmem_usage(array_expressions_container const & array_exp return res; } -int reduction::check_invalid_impl(cl::Device const &, array_expressions_container const &) const +int reduction::check_invalid_impl(cl::Device const &, expressions_tuple const &) const { if (p_.fetching_policy==FETCH_FROM_LOCAL) return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; @@ -56,7 +56,7 @@ inline void reduction::reduce_1d_local_memory(kernel_generation_stream & stream, stream << "}" << std::endl; } -std::string reduction::generate_impl(unsigned int label, const char * type, array_expressions_container const & array_expressions, std::vector const & mappings, unsigned int simd_width) const +std::string reduction::generate_impl(unsigned int label, const char * type, expressions_tuple const & expressions, std::vector const & mappings, unsigned int simd_width) const { kernel_generation_stream stream; @@ -89,13 +89,13 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra fill_kernel_name(kprefix, label, type); stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; - stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); stream << "unsigned int lid = get_local_id(0);" << std::endl; process(stream, PARENT_NODE_TYPE, tools::make_map >("array0", "#scalartype #namereg = #pointer[#start];") - ("array1", "#pointer += #start;"), array_expressions, mappings); + ("array1", "#pointer += #start;"), expressions, mappings); for (unsigned int k = 0; k < N; ++k) { @@ -194,7 +194,7 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra * Second kernel * -----------------------*/ stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; - stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -246,7 +246,7 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra std::map accessors; accessors["scalar_reduction"] = "#name_buf[0]"; accessors["array0"] = "#pointer[#start]"; - evaluate(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings); + evaluate(stream, PARENT_NODE_TYPE, accessors, expressions, mappings); stream.dec_tab(); stream << "}" << std::endl; @@ -256,11 +256,11 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra return stream.str(); } -std::vector reduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const +std::vector reduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const { std::vector result; - result.push_back(generate_impl(label, "f", array_expressions, mappings, 1)); - result.push_back(generate_impl(label, "o", array_expressions, mappings, p_.simd_width)); + result.push_back(generate_impl(label, "f", expressions, mappings, 1)); + result.push_back(generate_impl(label, "o", expressions, mappings, p_.simd_width)); return result; } @@ -273,22 +273,22 @@ reduction::reduction(unsigned int simd, unsigned int ls, unsigned int ng, base_impl(reduction_parameters(simd,ls,ng,fetch), bind) {} -std::vector reduction::input_sizes(array_expressions_container const & array_expressions) +std::vector reduction::input_sizes(expressions_tuple const & expressions) { - std::vector reductions_idx = filter_nodes(&is_reduction, *(array_expressions.data().front()), false); - int_t N = vector_size(lhs_most(array_expressions.data().front()->tree(), reductions_idx[0])); + std::vector reductions_idx = filter_nodes(&is_reduction, *(expressions.data().front()), false); + int_t N = vector_size(lhs_most(expressions.data().front()->tree(), reductions_idx[0])); return tools::make_vector() << N; } void reduction::enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, - array_expressions_container const & array_expressions) + expressions_tuple const & expressions) { //Preprocessing - int_t size = input_sizes(array_expressions)[0]; + int_t size = input_sizes(expressions)[0]; std::vector reductions; - for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it) + for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it) { std::vector reductions_idx = filter_nodes(&is_reduction, **it, false); for (std::vector::iterator itt = reductions_idx.begin(); itt != reductions_idx.end(); ++itt) @@ -303,7 +303,7 @@ void reduction::enqueue(cl::CommandQueue & queue, fill_kernel_name(kopt[0], label, "o0"); fill_kernel_name(kopt[1], label, "o1"); - bool fallback = p_.simd_width > 1 && (requires_fallback(array_expressions) || (size%p_.simd_width>0)); + bool fallback = p_.simd_width > 1 && (requires_fallback(expressions) || (size%p_.simd_width>0)); cl::Program & program = programs[fallback?0:1].program(); cl::Kernel kernels[2] = { cl::Kernel(program, fallback?kfallback[0]:kopt[0]), cl::Kernel(program, fallback?kfallback[1]:kopt[1]) }; @@ -313,8 +313,8 @@ void reduction::enqueue(cl::CommandQueue & queue, cl::NDRange lrange[2] = { cl::NDRange(p_.local_size_0), cl::NDRange(p_.local_size_0) }; //Arguments - cl::Context context = array_expressions.context(); - array_expression const & s = *(array_expressions.data().front()); + cl::Context context = expressions.context(); + array_expression const & s = *(expressions.data().front()); unsigned int dtype_size = size_of(lhs_most(s.tree(), s.root()).lhs.dtype); for (unsigned int k = 0; k < 2; k++) { @@ -338,7 +338,7 @@ void reduction::enqueue(cl::CommandQueue & queue, kernels[k].setArg(n_arg++, tmp_[i]); i++; } - set_arguments(array_expressions, kernels[k], n_arg); + set_arguments(expressions, kernels[k], n_arg); } for (unsigned int k = 0; k < 2; k++) diff --git a/lib/backend/templates/vaxpy.cpp b/lib/backend/templates/vaxpy.cpp index cba6bd84d..2fbb4047a 100644 --- a/lib/backend/templates/vaxpy.cpp +++ b/lib/backend/templates/vaxpy.cpp @@ -16,14 +16,14 @@ vaxpy_parameters::vaxpy_parameters(unsigned int _simd_width, { } -int vaxpy::check_invalid_impl(cl::Device const &, array_expressions_container const &) const +int vaxpy::check_invalid_impl(cl::Device const &, expressions_tuple const &) const { if (p_.fetching_policy==FETCH_FROM_LOCAL) return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_VALID; } -std::vector vaxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector const & mappings) const +std::vector vaxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector const & mappings) const { std::vector result; for (unsigned int i = 0; i < 2; ++i) @@ -36,14 +36,14 @@ std::vector vaxpy::generate_impl(unsigned int label, array_expressi stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; char kprefix[10]; fill_kernel_name(kprefix, label, (i==0?"f":"o")); - stream << "__kernel void " << kprefix << "(unsigned int N," << generate_arguments(data_type, mappings, array_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "(unsigned int N," << generate_arguments(data_type, mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); process(stream, PARENT_NODE_TYPE, tools::make_map >("array0", "#scalartype #namereg = #pointer[#start];") ("array1", "#pointer += #start;") - ("array1", "#start1/=" + str_simd_width + ";"), array_expressions, mappings); + ("array1", "#start1/=" + str_simd_width + ";"), expressions, mappings); std::string init, upper_bound, inc; fetching_loop_info(p_.fetching_policy, "N/"+str_simd_width, stream, init, upper_bound, inc, "get_global_id(0)", "get_global_size(0)"); @@ -55,7 +55,7 @@ std::vector vaxpy::generate_impl(unsigned int label, array_expressi ("matrix_row", "#scalartype #namereg = $VALUE{#row*#stride1, i*#stride2};") ("matrix_column", "#scalartype #namereg = $VALUE{i*#stride1,#column*#stride2};") ("matrix_diag", "#scalartype #namereg = #pointer[#diag_offset<0?$OFFSET{(i - #diag_offset)*#stride1, i*#stride2}:$OFFSET{i*#stride1, (i + #diag_offset)*#stride2}];") - , array_expressions, mappings); + , expressions, mappings); evaluate(stream, PARENT_NODE_TYPE, tools::make_map >("array1", "#namereg") ("matrix_row", "#namereg") @@ -63,13 +63,13 @@ std::vector vaxpy::generate_impl(unsigned int label, array_expressi ("matrix_diag", "#namereg") ("array0", "#namereg") ("cast", "convert_"+data_type) - , array_expressions, mappings); + , expressions, mappings); process(stream, LHS_NODE_TYPE, tools::make_map >("array1", "#pointer[i*#stride] = #namereg;") ("matrix_row", "$VALUE{#row, i} = #namereg;") ("matrix_column", "$VALUE{i, #column} = #namereg;") ("matrix_diag", "#diag_offset<0?$VALUE{(i - #diag_offset)*#stride1, i*#stride2}:$VALUE{i*#stride1, (i + #diag_offset)*#stride2} = #namereg;") - ,array_expressions, mappings); + ,expressions, mappings); stream.dec_tab(); stream << "}" << std::endl; @@ -77,7 +77,7 @@ std::vector vaxpy::generate_impl(unsigned int label, array_expressi stream << "if(get_global_id(0)==0)" << std::endl; stream << "{" << std::endl; stream.inc_tab(); - process(stream, LHS_NODE_TYPE, tools::make_map >("array0", "#pointer[#start] = #namereg;"), array_expressions, mappings); + process(stream, LHS_NODE_TYPE, tools::make_map >("array0", "#pointer[#start] = #namereg;"), expressions, mappings); stream.dec_tab(); stream << "}" << std::endl; @@ -102,25 +102,25 @@ vaxpy::vaxpy(unsigned int simd, unsigned int ls, unsigned int ng, {} -std::vector vaxpy::input_sizes(array_expressions_container const & array_expressions) +std::vector vaxpy::input_sizes(expressions_tuple const & expressions) { - int_t size = static_cast(array_expressions.data().front().get())->shape()._1; + int_t size = static_cast(expressions.data().front().get())->shape()._1; return tools::make_vector() << size; } void vaxpy::enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, - array_expressions_container const & array_expressions) + expressions_tuple const & expressions) { //Size - int_t size = input_sizes(array_expressions)[0]; + int_t size = input_sizes(expressions)[0]; //Kernel char kfb[10]; char kopt[10]; fill_kernel_name(kfb, label, "f"); fill_kernel_name(kopt, label, "o"); - bool fallback = p_.simd_width > 1 && (requires_fallback(array_expressions) || (size%p_.simd_width>0)); + bool fallback = p_.simd_width > 1 && (requires_fallback(expressions) || (size%p_.simd_width>0)); cl::Program const & program = programs[fallback?0:1].program(); cl_ext::kernels_t::key_type key(program(), label); @@ -135,7 +135,7 @@ void vaxpy::enqueue(cl::CommandQueue & queue, //Arguments unsigned int current_arg = 0; kernel.setArg(current_arg++, cl_uint(size)); - set_arguments(array_expressions, kernel, current_arg); + set_arguments(expressions, kernel, current_arg); queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange); queue.flush(); } diff --git a/lib/model/model.cpp b/lib/model/model.cpp index ab74be560..950602f00 100644 --- a/lib/model/model.cpp +++ b/lib/model/model.cpp @@ -28,9 +28,9 @@ std::string model::define_extension(std::string const & extensions, std::string return std::string(""); } -void model::fill_program_name(char* program_name, array_expressions_container const & array_expressions, binding_policy_t binding_policy) +void model::fill_program_name(char* program_name, expressions_tuple const & expressions, binding_policy_t binding_policy) { - if (array_expressions.order()==array_expressions_container::INDEPENDENT) + if (expressions.order()==expressions_tuple::INDEPENDENT) *program_name++='i'; else *program_name++='s'; @@ -39,31 +39,39 @@ void model::fill_program_name(char* program_name, array_expressions_container co binder = new bind_to_handle(); else binder = new bind_all_unique(); - for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it) + for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it) traverse(**it, (*it)->root(), array_expression_representation_functor(*binder, program_name),true); *program_name='\0'; delete binder; } -std::vector& model::init(array_expressions_container const & array_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation) +std::vector& model::init(expressions_tuple const & expressions, runtime_options const & opt) { - char program_name[256]; - fill_program_name(program_name, array_expressions, BIND_TO_HANDLE); - std::string pname(program_name); + cl::Context const & context = expressions.context(); + std::string pname; + if(opt.program_name.empty()) + { + char program_name[256]; + fill_program_name(program_name, expressions, BIND_TO_HANDLE); + pname = std::string(program_name); + } + else + pname = opt.program_name; std::vector & to_init = lazy_programs_[context()][pname]; if(to_init.empty()) { + cl::Device device = queue_.getInfo(); std::string extensions = device.getInfo(); - to_init.push_back(cl_ext::lazy_compiler(context, pname, force_recompilation)); + to_init.push_back(cl_ext::lazy_compiler(context, pname, opt.recompile)); to_init.back().add(define_extension(extensions, "cl_khr_fp64")); - to_init.push_back(cl_ext::lazy_compiler(context, pname + "_fb", force_recompilation)); + to_init.push_back(cl_ext::lazy_compiler(context, pname + "_fb", opt.recompile)); to_init.back().add(define_extension(extensions, "cl_khr_fp64")); for(size_t i = 0 ; i < templates_.size() ; ++i) { - std::vector cur = templates_[i]->generate(i, array_expressions, device); + std::vector cur = templates_[i]->generate(i, expressions, device); for(size_t j = 0 ; j < cur.size() ; ++j){ to_init[j].add(cur[j]); } @@ -82,42 +90,37 @@ model::model(std::vector< tools::shared_ptr > const & templates, cl::Comma model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue) {} -void model::execute(array_expressions_container const & array_expressions, bool bypass_predictor, bool force_recompilation) +void model::execute(expressions_tuple const & expressions, runtime_options const & opt) { - bypass_predictor = bypass_predictor || predictor_.get()==NULL; - cl::Context const & context = array_expressions.context(); - assert(context() == queue_.getInfo()()); - cl::Device const & device = queue_.getInfo(); - - std::vector & compilers = init(array_expressions, context, device, force_recompilation); + std::vector & compilers = init(expressions, opt); //Prediction - std::vector x = templates_[0]->input_sizes(array_expressions); - int label; - //The user tuned the model specifically for this input size - if(hardcoded_.find(x)!=hardcoded_.end()) - label = hardcoded_.at(x); - //The user bypasses the random forest - else if(bypass_predictor) - label = 0; - //Default + int label = 0; + if(opt.label>=0) + { + label = opt.label; + } else { - std::vector predictions = predictor_->predict(x); - label = std::distance(predictions.begin(),std::min_element(predictions.begin(), predictions.end())); + std::vector x = templates_[0]->input_sizes(expressions); + //The user tuned the model specifically for this input size + if(hardcoded_.find(x)!=hardcoded_.end()) + label = hardcoded_.at(x); + //The user bypasses the random forest + else if(predictor_.get()) + { + std::vector predictions = predictor_->predict(x); + label = std::distance(predictions.begin(),std::min_element(predictions.begin(), predictions.end())); + } } //Execution - templates_[label]->enqueue(queue_, compilers, label, array_expressions); + templates_[label]->enqueue(queue_, compilers, label, expressions); } -void model::tune(array_expressions_container const & array_expressions) +void model::tune(expressions_tuple const & expressions) { - cl::Context const & context = array_expressions.context(); - assert(context() == queue_.getInfo()()); - cl::Device device = queue_.getInfo(); - - std::vector & compilers = init(array_expressions, context, device, false); + std::vector & compilers = init(expressions); //Collect the timings std::vector timings(templates_.size()); @@ -125,13 +128,13 @@ void model::tune(array_expressions_container const & array_expressions) for(size_t i = 0 ; i < templates_.size() ; ++i) { timer.start(); - templates_[i]->enqueue(queue_, compilers, i, array_expressions); + templates_[i]->enqueue(queue_, compilers, i, expressions); queue_.finish(); timings[i] = timer.get(); } //Fill the override - std::vector x = templates_[0]->input_sizes(array_expressions); + std::vector x = templates_[0]->input_sizes(expressions); hardcoded_[x] = std::distance(timings.begin(),std::min_element(timings.begin(), timings.end())); } diff --git a/lib/model/predictors/random_forest.cpp b/lib/model/predictors/random_forest.cpp index 336217340..fbdf0fb32 100644 --- a/lib/model/predictors/random_forest.cpp +++ b/lib/model/predictors/random_forest.cpp @@ -32,19 +32,19 @@ random_forest::random_forest(rapidjson::Value const & estimators) { for(rapidjson::SizeType i = 0 ; i < estimators.Size() ; ++i) estimators_.push_back(tree(estimators[i])); + D_ = estimators_.front().D(); } std::vector random_forest::predict(std::vector const & x) const { - int_t D = estimators_.front().D(); - std::vector res(D, 0); + std::vector res(D_, 0); for(std::vector::const_iterator it = estimators_.begin() ; it != estimators_.end() ; ++it) { std::vector const & subres = it->predict(x); - for(int_t i = 0 ; i < D ; ++i) + for(int_t i = 0 ; i < D_ ; ++i) res[i] += subres[i]; } - for(int_t i = 0 ; i < D ; ++i) + for(int_t i = 0 ; i < D_ ; ++i) res[i] /= estimators_.size(); return res; } diff --git a/lib/symbolic/expression.cpp b/lib/symbolic/expression.cpp index d0d8aafce..ec7ffc509 100644 --- a/lib/symbolic/expression.cpp +++ b/lib/symbolic/expression.cpp @@ -176,32 +176,32 @@ array_expression array_expression::operator!() // -tools::shared_ptr array_expressions_container::create(array_expression const & s) +tools::shared_ptr expressions_tuple::create(array_expression const & s) { return tools::shared_ptr(new array_expression(static_cast(s))); } -array_expressions_container::array_expressions_container(data_type const & data, order_type order) : data_(data), order_(order) +expressions_tuple::expressions_tuple(data_type const & data, order_type order) : data_(data), order_(order) { } -array_expressions_container::array_expressions_container(array_expression const & s0) : order_(INDEPENDENT) +expressions_tuple::expressions_tuple(array_expression const & s0) : order_(INDEPENDENT) { data_.push_back(create(s0)); } -array_expressions_container::array_expressions_container(order_type order, array_expression const & s0, array_expression const & s1) : order_(order) +expressions_tuple::expressions_tuple(order_type order, array_expression const & s0, array_expression const & s1) : order_(order) { data_.push_back(create(s0)); data_.push_back(create(s1)); } -array_expressions_container::data_type const & array_expressions_container::data() const +expressions_tuple::data_type const & expressions_tuple::data() const { return data_; } -cl::Context const & array_expressions_container::context() const +cl::Context const & expressions_tuple::context() const { return data_.front()->context(); } -array_expressions_container::order_type array_expressions_container::order() const +expressions_tuple::order_type expressions_tuple::order() const { return order_; } array_expression::node const & lhs_most(array_expression::container_type const & array, array_expression::node const & init) diff --git a/python/pyatidlas/src/_atidlas.cpp b/python/pyatidlas/src/_atidlas.cpp index 47bea9bbc..ef01cd340 100644 --- a/python/pyatidlas/src/_atidlas.cpp +++ b/python/pyatidlas/src/_atidlas.cpp @@ -477,7 +477,7 @@ void export_array() .def(bp::self OP bp::self)\ ADD_SCALAR_HANDLING(OP) - bp::class_ + bp::class_ ("array_expression_container", bp::init()) ;