Some renaming; lower overhead in benchmark

This commit is contained in:
Philippe Tillet
2015-02-01 22:28:49 -05:00
parent f0bb130416
commit 535706f35a
23 changed files with 239 additions and 213 deletions

View File

@@ -33,7 +33,7 @@ void bench(ad::numeric_type dtype)
total_time = 0;\ total_time = 0;\
OP;\ OP;\
ad::cl_ext::synchronize(ad::cl_ext::default_context());\ ad::cl_ext::synchronize(ad::cl_ext::default_context());\
while(total_time < 1e-1){\ while(total_time < 5e-1){\
timer.start(); \ timer.start(); \
OP;\ OP;\
SYNC;\ SYNC;\
@@ -41,7 +41,7 @@ void bench(ad::numeric_type dtype)
total_time += times.back();\ total_time += times.back();\
}\ }\
float tres = median(times);\ float tres = median(times);\
std::cout << " " << tres << std::flush;\ std::cout << " " << PERF << std::flush;\
} }
#define CL_BENCHMARK(OP, PERF) BENCHMARK(OP, PERF, ad::cl_ext::synchronize(ad::cl_ext::default_context())) #define CL_BENCHMARK(OP, PERF) BENCHMARK(OP, PERF, ad::cl_ext::synchronize(ad::cl_ext::default_context()))
@@ -64,7 +64,9 @@ void bench(ad::numeric_type dtype)
ad::array x(N, dtype), y(N, dtype); ad::array x(N, dtype), y(N, dtype);
ad::array_expression E = ad::detail::assign(y, x + y); ad::array_expression E = ad::detail::assign(y, x + y);
ad::model & model = ad::get_model(ad::cl_ext::get_queue(x.context(), 0), ad::VECTOR_AXPY_TYPE, dtype); ad::model & model = ad::get_model(ad::cl_ext::get_queue(x.context(), 0), ad::VECTOR_AXPY_TYPE, dtype);
CL_BENCHMARK(model.execute(E), bandwidth(3*N, tres, dtsize)); ad::model::runtime_options opt("saxpy");
model.tune(E);
CL_BENCHMARK(model.execute(E, opt), bandwidth(3*N, tres, dtsize));
/* clAmdBlas */ /* clAmdBlas */
#ifdef BENCH_CLAMDBLAS #ifdef BENCH_CLAMDBLAS
CL_BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize)) CL_BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))

View File

@@ -73,4 +73,14 @@ T median(std::vector<T> x)
return x[size / 2]; return x[size / 2];
} }
template<class T>
T mean(std::vector<T> x)
{
T res = 0;
int N = x.size();
for(int i = 0 ; i < N ; ++i)
res += x[i];
return res/N;
}
#endif #endif

View File

@@ -113,7 +113,7 @@ std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & acc
atidlas::array_expression const & array_expression, int_t root_idx, mapping_type const & mapping); atidlas::array_expression const & array_expression, int_t root_idx, mapping_type const & mapping);
void evaluate(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors, void evaluate(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors,
array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings); expressions_tuple const & expressions, std::vector<mapping_type> const & mappings);
/** @brief functor for fetching or writing-back the elements in a array_expression */ /** @brief functor for fetching or writing-back the elements in a array_expression */
class process_traversal : public traversal_functor class process_traversal : public traversal_functor
@@ -133,7 +133,7 @@ void process(kernel_generation_stream & stream, leaf_t leaf, std::map<std::strin
atidlas::array_expression const & array_expression, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed); atidlas::array_expression const & array_expression, size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed);
void process(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors, void process(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors,
array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings); expressions_tuple const & expressions, std::vector<mapping_type> const & mappings);
class array_expression_representation_functor : public traversal_functor{ class array_expression_representation_functor : public traversal_functor{

View File

@@ -69,7 +69,7 @@ public:
protected: protected:
/** @brief Functor to map the array_expressions to the types defined in mapped_objects.hpp */ /** @brief Functor to map the expressions to the types defined in mapped_objects.hpp */
class map_functor : public traversal_functor class map_functor : public traversal_functor
{ {
/** @brief Accessor for the numeric type */ /** @brief Accessor for the numeric type */
@@ -130,9 +130,9 @@ protected:
size_t root_idx, leaf_t leaf); size_t root_idx, leaf_t leaf);
static std::string neutral_element(op_element const & op); static std::string neutral_element(op_element const & op);
static std::string generate_arguments(std::vector<mapping_type> const & mappings, std::map<std::string, std::string> const & accessors, static std::string generate_arguments(std::vector<mapping_type> const & mappings, std::map<std::string, std::string> const & accessors,
array_expressions_container const & array_expressions); expressions_tuple const & expressions);
static std::string generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, static std::string generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings,
array_expressions_container const & array_expressions); expressions_tuple const & expressions);
static void fill_kernel_name(char * ptr, unsigned int label, const char * suffix); static void fill_kernel_name(char * ptr, unsigned int label, const char * suffix);
static bool is_node_trans(array_expression::container_type const & array, size_t root_idx, leaf_t leaf_type); static bool is_node_trans(array_expression::container_type const & array, size_t root_idx, leaf_t leaf_type);
static std::string append_simd_suffix(std::string const & str, unsigned int i); static std::string append_simd_suffix(std::string const & str, unsigned int i);
@@ -147,24 +147,24 @@ protected:
static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr); static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr);
static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr); static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr);
static std::string append_width(std::string const & str, unsigned int width); static std::string append_width(std::string const & str, unsigned int width);
static bool requires_fallback(array_expressions_container const & array_expressions); static bool requires_fallback(expressions_tuple const & expressions);
void set_arguments(array_expressions_container const & array_expressions, cl::Kernel & kernel, unsigned int & current_arg); void set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg);
private: private:
virtual std::vector<std::string> generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mapping) const = 0; virtual std::vector<std::string> generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mapping) const = 0;
public: public:
base(binding_policy_t binding_policy); base(binding_policy_t binding_policy);
virtual unsigned int lmem_usage(array_expressions_container const &) const; virtual unsigned int lmem_usage(expressions_tuple const &) const;
virtual unsigned int registers_usage(array_expressions_container const &) const; virtual unsigned int registers_usage(expressions_tuple const &) const;
virtual std::vector<int_t> input_sizes(array_expressions_container const & array_expressions) = 0; virtual std::vector<int_t> input_sizes(expressions_tuple const & expressions) = 0;
virtual ~base(); virtual ~base();
std::vector<std::string> generate(unsigned int label, array_expressions_container const & array_expressions, cl::Device const & device); std::vector<std::string> generate(unsigned int label, expressions_tuple const & expressions, cl::Device const & device);
virtual int check_invalid(array_expressions_container const & array_expressions, cl::Device const & device) const = 0; virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0;
virtual void enqueue(cl::CommandQueue & queue, virtual void enqueue(cl::CommandQueue & queue,
std::vector<cl_ext::lazy_compiler> & programs, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, array_expressions_container const & array_expressions) = 0; unsigned int label, expressions_tuple const & expressions) = 0;
virtual tools::shared_ptr<base> clone() const = 0; virtual tools::shared_ptr<base> clone() const = 0;
private: private:
binding_policy_t binding_policy_; binding_policy_t binding_policy_;
@@ -175,7 +175,7 @@ template<class TemplateType, class ParametersType>
class base_impl : public base class base_impl : public base
{ {
private: private:
virtual int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; virtual int check_invalid_impl(cl::Device const &, expressions_tuple const &) const;
public: public:
typedef ParametersType parameters_type; typedef ParametersType parameters_type;
base_impl(parameters_type const & parameters, binding_policy_t binding_policy); base_impl(parameters_type const & parameters, binding_policy_t binding_policy);
@@ -183,7 +183,7 @@ public:
int_t local_size_1() const; int_t local_size_1() const;
tools::shared_ptr<base> clone() const; tools::shared_ptr<base> clone() const;
/** @brief returns whether or not the profile has undefined behavior on particular device */ /** @brief returns whether or not the profile has undefined behavior on particular device */
int check_invalid(array_expressions_container const & array_expressions, cl::Device const & device) const; int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const;
protected: protected:
parameters_type p_; parameters_type p_;
binding_policy_t binding_policy_; binding_policy_t binding_policy_;

View File

@@ -20,14 +20,14 @@ public:
class maxpy : public base_impl<maxpy, maxpy_parameters> class maxpy : public base_impl<maxpy, maxpy_parameters>
{ {
private: private:
int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; int check_invalid_impl(cl::Device const &, expressions_tuple const &) const;
std::string generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const; std::string generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const;
std::vector<std::string> generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const; std::vector<std::string> generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const;
public: public:
maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE); maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE); maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
std::vector<int_t> input_sizes(array_expressions_container const & array_expressions); std::vector<int_t> input_sizes(expressions_tuple const & expressions);
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, array_expressions_container const & array_expressions); void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, expressions_tuple const & expressions);
}; };
} }

View File

@@ -33,25 +33,25 @@ struct mproduct_parameters : public base::parameters_type
class mproduct : public base_impl<mproduct, mproduct_parameters> class mproduct : public base_impl<mproduct, mproduct_parameters>
{ {
private: private:
unsigned int lmem_usage(array_expressions_container const & array_expressions) const; unsigned int lmem_usage(expressions_tuple const & expressions) const;
unsigned int registers_usage(array_expressions_container const & array_expressions) const; unsigned int registers_usage(expressions_tuple const & expressions) const;
int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; int check_invalid_impl(cl::Device const &, expressions_tuple const &) const;
std::string generate_impl(unsigned int label, const char * id, const array_expressions_container &array_expressions, const std::vector<mapping_type> &, bool fallback) const; std::string generate_impl(unsigned int label, const char * id, const expressions_tuple &expressions, const std::vector<mapping_type> &, bool fallback) const;
std::vector<std::string> generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const; std::vector<std::string> generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const;
void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K, void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
array_infos const & A, array_infos const & B, array_infos const & C, array_infos const & A, array_infos const & B, array_infos const & C,
value_scalar const & alpha, value_scalar const & beta, value_scalar const & alpha, value_scalar const & beta,
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id); std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id);
array_infos create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap); array_infos create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap);
std::vector<int_t> infos(array_expressions_container const & array_expressions, std::vector<int_t> infos(expressions_tuple const & expressions,
lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B); lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B);
public: public:
mproduct(mproduct::parameters_type const & parameters, char A_trans, char B_trans); mproduct(mproduct::parameters_type const & parameters, char A_trans, char B_trans);
std::vector<int_t> input_sizes(array_expressions_container const & array_expressions); std::vector<int_t> input_sizes(expressions_tuple const & expressions);
void enqueue(cl::CommandQueue & queue, void enqueue(cl::CommandQueue & queue,
std::vector<cl_ext::lazy_compiler> & programs, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, unsigned int label,
array_expressions_container const & array_expressions); expressions_tuple const & expressions);
private: private:
const char A_trans_; const char A_trans_;

View File

@@ -29,13 +29,13 @@ protected:
}; };
mreduction(mreduction::parameters_type const & , reduction_type, binding_policy_t); mreduction(mreduction::parameters_type const & , reduction_type, binding_policy_t);
private: private:
virtual int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; virtual int check_invalid_impl(cl::Device const &, expressions_tuple const &) const;
unsigned int lmem_usage() const; unsigned int lmem_usage() const;
std::string generate_impl(unsigned int, array_expressions_container const &, std::vector<mapping_type> const &, unsigned int, std::vector<mapped_mreduction*> const &) const; std::string generate_impl(unsigned int, expressions_tuple const &, std::vector<mapping_type> const &, unsigned int, std::vector<mapped_mreduction*> const &) const;
std::vector<std::string> generate_impl(unsigned int, array_expressions_container const &, std::vector<mapping_type> const &) const; std::vector<std::string> generate_impl(unsigned int, expressions_tuple const &, std::vector<mapping_type> const &) const;
public: public:
virtual std::vector<int_t> input_sizes(array_expressions_container const & array_expressions); virtual std::vector<int_t> input_sizes(expressions_tuple const & expressions);
void enqueue(cl::CommandQueue & queue,std::vector<cl_ext::lazy_compiler> & programs,unsigned int label, array_expressions_container const & array_expressions); void enqueue(cl::CommandQueue & queue,std::vector<cl_ext::lazy_compiler> & programs,unsigned int label, expressions_tuple const & expressions);
private: private:
reduction_type reduction_type_; reduction_type reduction_type_;
}; };

View File

@@ -18,21 +18,21 @@ struct reduction_parameters : public base::parameters_type
class reduction : public base_impl<reduction, reduction_parameters> class reduction : public base_impl<reduction, reduction_parameters>
{ {
private: private:
unsigned int lmem_usage(array_expressions_container const & array_expressions) const; unsigned int lmem_usage(expressions_tuple const & expressions) const;
int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; int check_invalid_impl(cl::Device const &, expressions_tuple const &) const;
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_reduction*> exprs, inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_reduction*> exprs,
std::string const & buf_str, std::string const & buf_value_str) const; std::string const & buf_str, std::string const & buf_value_str) const;
std::string generate_impl(unsigned int label, const char * type, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const; std::string generate_impl(unsigned int label, const char * type, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const;
std::vector<std::string> generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const; std::vector<std::string> generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const;
public: public:
reduction(reduction::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE); reduction(reduction::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
reduction(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE); reduction(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
std::vector<int_t> input_sizes(array_expressions_container const & array_expressions); std::vector<int_t> input_sizes(expressions_tuple const & expressions);
void enqueue(cl::CommandQueue & queue, void enqueue(cl::CommandQueue & queue,
std::vector<cl_ext::lazy_compiler> & programs, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, unsigned int label,
array_expressions_container const & array_expressions); expressions_tuple const & expressions);
private: private:
std::vector< cl::Buffer > tmp_; std::vector< cl::Buffer > tmp_;
std::vector< cl::Buffer > tmpidx_; std::vector< cl::Buffer > tmpidx_;

View File

@@ -17,14 +17,14 @@ public:
class vaxpy : public base_impl<vaxpy, vaxpy_parameters> class vaxpy : public base_impl<vaxpy, vaxpy_parameters>
{ {
private: private:
virtual int check_invalid_impl(cl::Device const &, array_expressions_container const &) const; virtual int check_invalid_impl(cl::Device const &, expressions_tuple const &) const;
std::vector<std::string> generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const; std::vector<std::string> generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const;
public: public:
vaxpy(vaxpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE); vaxpy(vaxpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
vaxpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_ALL_UNIQUE); vaxpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
std::vector<int_t> input_sizes(array_expressions_container const & array_expressions); std::vector<int_t> input_sizes(expressions_tuple const & expressions);
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, array_expressions_container const & array_expressions); unsigned int label, expressions_tuple const & expressions);
}; };
} }

View File

@@ -17,19 +17,29 @@ namespace atidlas
class model class model
{ {
typedef std::vector< tools::shared_ptr<base> > templates_container; typedef std::vector< tools::shared_ptr<base> > templates_container;
public:
struct runtime_options
{
runtime_options() : label(-1), recompile(false){}
runtime_options(std::string const & p) : program_name(p), label(-1), recompile(false){}
std::string program_name;
int label;
bool recompile;
};
private: private:
std::string define_extension(std::string const & extensions, std::string const & ext); std::string define_extension(std::string const & extensions, std::string const & ext);
inline void fill_program_name(char* program_name, array_expressions_container const & array_expressions, binding_policy_t binding_policy); inline void fill_program_name(char* program_name, expressions_tuple const & expressions, binding_policy_t binding_policy);
std::vector<cl_ext::lazy_compiler>& init(array_expressions_container const & array_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation); std::vector<cl_ext::lazy_compiler>& init(expressions_tuple const & expressions, runtime_options const & opt = runtime_options());
public: public:
model(predictors::random_forest const &, std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &); model(predictors::random_forest const &, std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
model(std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &); model(std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
model(base const &, cl::CommandQueue &); model(base const &, cl::CommandQueue &);
void execute(array_expressions_container const &, bool bypass_predictor = false, bool force_recompilation = false); void execute(expressions_tuple const &, runtime_options const & opt = runtime_options());
void tune(array_expressions_container const &); void tune(expressions_tuple const &);
templates_container const & templates() const; templates_container const & templates() const;
private: private:

View File

@@ -38,6 +38,7 @@ public:
std::vector<tree> const & estimators() const; std::vector<tree> const & estimators() const;
private: private:
std::vector<tree> estimators_; std::vector<tree> estimators_;
int_t D_;
}; };
} }

View File

@@ -208,7 +208,7 @@ private:
size4 shape_; size4 shape_;
}; };
class array_expressions_container class expressions_tuple
{ {
private: private:
tools::shared_ptr<array_expression> create(array_expression const & s); tools::shared_ptr<array_expression> create(array_expression const & s);
@@ -216,9 +216,9 @@ public:
typedef std::list<tools::shared_ptr<array_expression> > data_type; typedef std::list<tools::shared_ptr<array_expression> > data_type;
enum order_type { SEQUENTIAL, INDEPENDENT }; enum order_type { SEQUENTIAL, INDEPENDENT };
array_expressions_container(array_expression const & s0); expressions_tuple(array_expression const & s0);
array_expressions_container(order_type order, array_expression const & s0, array_expression const & s1); expressions_tuple(order_type order, array_expression const & s0, array_expression const & s1);
array_expressions_container(data_type const & data, order_type order); expressions_tuple(data_type const & data, order_type order);
data_type const & data() const; data_type const & data() const;
cl::Context const & context() const; cl::Context const & context() const;

View File

@@ -303,12 +303,12 @@ std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & acc
} }
void evaluate(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors, void evaluate(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors,
array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) expressions_tuple const & expressions, std::vector<mapping_type> const & mappings)
{ {
array_expressions_container::data_type::const_iterator sit; expressions_tuple::data_type::const_iterator sit;
std::vector<mapping_type>::const_iterator mit; std::vector<mapping_type>::const_iterator mit;
for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++mit, ++sit) for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++mit, ++sit)
stream << evaluate(leaf, accessors, **sit, (*sit)->root(), *mit) << ";" << std::endl; stream << evaluate(leaf, accessors, **sit, (*sit)->root(), *mit) << ";" << std::endl;
} }
@@ -368,13 +368,13 @@ void process(kernel_generation_stream & stream, leaf_t leaf, std::map<std::strin
} }
void process(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors, void process(kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors,
array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) expressions_tuple const & expressions, std::vector<mapping_type> const & mappings)
{ {
array_expressions_container::data_type::const_iterator sit; expressions_tuple::data_type::const_iterator sit;
std::vector<mapping_type>::const_iterator mit; std::vector<mapping_type>::const_iterator mit;
std::set<std::string> already_processed; std::set<std::string> already_processed;
for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++mit, ++sit) for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++mit, ++sit)
process(stream, leaf, accessors, **sit, (*sit)->root(), *mit, already_processed); process(stream, leaf, accessors, **sit, (*sit)->root(), *mit, already_processed);
} }

View File

@@ -258,30 +258,30 @@ std::string base::neutral_element(op_element const & op)
} }
} }
std::string base::generate_arguments(std::vector<mapping_type> const & mappings, std::map<std::string, std::string> const & accessors, array_expressions_container const & array_expressions) std::string base::generate_arguments(std::vector<mapping_type> const & mappings, std::map<std::string, std::string> const & accessors, expressions_tuple const & expressions)
{ {
kernel_generation_stream stream; kernel_generation_stream stream;
process(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings); process(stream, PARENT_NODE_TYPE, accessors, expressions, mappings);
std::string res = stream.str(); std::string res = stream.str();
res.erase(res.rfind(',')); res.erase(res.rfind(','));
return res; return res;
} }
std::string base::generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, array_expressions_container const & array_expressions) std::string base::generate_arguments(std::string const & data_type, std::vector<mapping_type> const & mappings, expressions_tuple const & expressions)
{ {
return generate_arguments(mappings, tools::make_map<std::map<std::string, std::string> >("array0", "__global #scalartype* #pointer, uint #start,") return generate_arguments(mappings, tools::make_map<std::map<std::string, std::string> >("array0", "__global #scalartype* #pointer, uint #start,")
("host_scalar", "#scalartype #name,") ("host_scalar", "#scalartype #name,")
("array1", "__global " + data_type + "* #pointer, uint #start, uint #stride,") ("array1", "__global " + data_type + "* #pointer, uint #start, uint #stride,")
("array2", "__global " + data_type + "* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,") ("array2", "__global " + data_type + "* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,")
("tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"), array_expressions); ("tuple4", "#scalartype #name0, #scalartype #name1, #scalartype #name2, #scalartype #name3,"), expressions);
} }
void base::set_arguments(array_expressions_container const & array_expressions, cl::Kernel & kernel, unsigned int & current_arg) void base::set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg)
{ {
tools::shared_ptr<symbolic_binder> binder = make_binder(); tools::shared_ptr<symbolic_binder> binder = make_binder();
for (array_expressions_container::data_type::const_iterator itt = array_expressions.data().begin(); itt != array_expressions.data().end(); ++itt) for (expressions_tuple::data_type::const_iterator itt = expressions.data().begin(); itt != expressions.data().end(); ++itt)
traverse(**itt, (*itt)->root(), set_arguments_functor(*binder, current_arg, kernel), true); traverse(**itt, (*itt)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
} }
@@ -370,9 +370,9 @@ bool base::is_strided(array_expression::node const & node)
|| node.op.type==OPERATOR_OUTER_PROD_TYPE; || node.op.type==OPERATOR_OUTER_PROD_TYPE;
} }
bool base::requires_fallback(array_expressions_container const & array_expressions) bool base::requires_fallback(expressions_tuple const & expressions)
{ {
for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it) for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
for(array_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt) for(array_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt)
if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0)) if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0))
|| (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0))) || (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0)))
@@ -490,34 +490,34 @@ tools::shared_ptr<symbolic_binder> base::make_binder()
base::base(binding_policy_t binding_policy) : binding_policy_(binding_policy) base::base(binding_policy_t binding_policy) : binding_policy_(binding_policy)
{} {}
unsigned int base::lmem_usage(array_expressions_container const &) const unsigned int base::lmem_usage(expressions_tuple const &) const
{ return 0; } { return 0; }
unsigned int base::registers_usage(array_expressions_container const &) const unsigned int base::registers_usage(expressions_tuple const &) const
{ return 0; } { return 0; }
base::~base() base::~base()
{ } { }
std::vector<std::string> base::generate(unsigned int label, array_expressions_container const & array_expressions, cl::Device const & device) std::vector<std::string> base::generate(unsigned int label, expressions_tuple const & expressions, cl::Device const & device)
{ {
array_expressions_container::data_type::const_iterator sit; expressions_tuple::data_type::const_iterator sit;
std::vector<mapping_type>::iterator mit; std::vector<mapping_type>::iterator mit;
if(int err = check_invalid(array_expressions, device)) if(int err = check_invalid(expressions, device))
throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err)); throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err));
//Create mapping //Create mapping
std::vector<mapping_type> mappings(array_expressions.data().size()); std::vector<mapping_type> mappings(expressions.data().size());
tools::shared_ptr<symbolic_binder> binder = make_binder(); tools::shared_ptr<symbolic_binder> binder = make_binder();
for (mit = mappings.begin(), sit = array_expressions.data().begin(); sit != array_expressions.data().end(); ++sit, ++mit) for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit)
traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true); traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true);
return generate_impl(label, array_expressions, mappings); return generate_impl(label, expressions, mappings);
} }
template<class TType, class PType> template<class TType, class PType>
int base_impl<TType, PType>::check_invalid_impl(cl::Device const &, array_expressions_container const &) const int base_impl<TType, PType>::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
{ return TEMPLATE_VALID; } { return TEMPLATE_VALID; }
template<class TType, class PType> template<class TType, class PType>
@@ -537,11 +537,11 @@ tools::shared_ptr<base> base_impl<TType, PType>::clone() const
{ return tools::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); } { return tools::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
template<class TType, class PType> template<class TType, class PType>
int base_impl<TType, PType>::check_invalid(array_expressions_container const & array_expressions, cl::Device const & device) const int base_impl<TType, PType>::check_invalid(expressions_tuple const & expressions, cl::Device const & device) const
{ {
//Query device informations //Query device informations
size_t lmem_available = device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>(); size_t lmem_available = device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
size_t lmem_used = lmem_usage(array_expressions); size_t lmem_used = lmem_usage(expressions);
if (lmem_used>lmem_available) if (lmem_used>lmem_available)
return TEMPLATE_LOCAL_MEMORY_OVERFLOW; return TEMPLATE_LOCAL_MEMORY_OVERFLOW;
@@ -575,7 +575,7 @@ int base_impl<TType, PType>::check_invalid(array_expressions_container const & a
p_.simd_width!=16) p_.simd_width!=16)
return TEMPLATE_INVALID_SIMD_WIDTH; return TEMPLATE_INVALID_SIMD_WIDTH;
return check_invalid_impl(device, array_expressions); return check_invalid_impl(device, expressions);
} }
template class base_impl<vaxpy, vaxpy_parameters>; template class base_impl<vaxpy, vaxpy_parameters>;

View File

@@ -14,7 +14,7 @@ maxpy_parameters::maxpy_parameters(unsigned int _simd_width,
int maxpy::check_invalid_impl(cl::Device const &, array_expressions_container const &) const int maxpy::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
{ {
if (p_.simd_width>1) if (p_.simd_width>1)
return TEMPLATE_INVALID_SIMD_WIDTH; return TEMPLATE_INVALID_SIMD_WIDTH;
@@ -23,7 +23,7 @@ int maxpy::check_invalid_impl(cl::Device const &, array_expressions_container co
return TEMPLATE_VALID; return TEMPLATE_VALID;
} }
std::string maxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const std::string maxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
{ {
kernel_generation_stream stream; kernel_generation_stream stream;
@@ -33,13 +33,13 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container
fill_kernel_name(kprefix, label, "d"); fill_kernel_name(kprefix, label, "d");
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl; stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
process(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];") process(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];")
("array1", "#pointer += #start;") ("array1", "#pointer += #start;")
("array2", "#pointer = &$VALUE{#start1, #start2};"), array_expressions, mappings); ("array2", "#pointer = &$VALUE{#start1, #start2};"), expressions, mappings);
fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)"); fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)");
stream << "for(unsigned int i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl; stream << "for(unsigned int i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl;
@@ -55,7 +55,7 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container
("vdiag", "#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:$VALUE{min(i*#stride1, j*#stride1)};") ("vdiag", "#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:$VALUE{min(i*#stride1, j*#stride1)};")
("repeat", "#scalartype #namereg = $VALUE{(i%#tuplearg0)*#stride1, (j%#tuplearg1)*#stride2};") ("repeat", "#scalartype #namereg = $VALUE{(i%#tuplearg0)*#stride1, (j%#tuplearg1)*#stride2};")
("outer", "#scalartype #namereg = ($LVALUE{i*#stride})*($RVALUE{j*#stride});") ("outer", "#scalartype #namereg = ($LVALUE{i*#stride})*($RVALUE{j*#stride});")
, array_expressions, mappings); , expressions, mappings);
evaluate(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> > evaluate(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >
("array2", "#namereg") ("array2", "#namereg")
@@ -64,10 +64,10 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container
("array0", "#namereg") ("array0", "#namereg")
("outer", "#namereg") ("outer", "#namereg")
("cast", "convert_"+data_type) ("cast", "convert_"+data_type)
, array_expressions, mappings); , expressions, mappings);
process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array2", "$VALUE{i*#stride1,j*#stride2} = #namereg;") process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array2", "$VALUE{i*#stride1,j*#stride2} = #namereg;")
, array_expressions, mappings); , expressions, mappings);
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
@@ -81,10 +81,10 @@ std::string maxpy::generate_impl(unsigned int label, array_expressions_container
return stream.str(); return stream.str();
} }
std::vector<std::string> maxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const std::vector<std::string> maxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
{ {
std::vector<std::string> res; std::vector<std::string> res;
res.push_back(generate_impl(label, array_expressions, mappings, 1)); res.push_back(generate_impl(label, expressions, mappings, 1));
return res; return res;
} }
@@ -97,9 +97,9 @@ maxpy::maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2,
base_impl<maxpy, maxpy_parameters>(maxpy_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind) base_impl<maxpy, maxpy_parameters>(maxpy_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind)
{} {}
std::vector<int_t> maxpy::input_sizes(array_expressions_container const & array_expressions) std::vector<int_t> maxpy::input_sizes(expressions_tuple const & expressions)
{ {
atidlas::array_expression const & array_expression = *(array_expressions.data().front()); atidlas::array_expression const & array_expression = *(expressions.data().front());
std::pair<int_t, int_t> size = matrix_size(lhs_most(array_expression.tree(), array_expression.root())); std::pair<int_t, int_t> size = matrix_size(lhs_most(array_expression.tree(), array_expression.root()));
return tools::make_vector<int_t>() << size.first << size.second; return tools::make_vector<int_t>() << size.first << size.second;
} }
@@ -107,7 +107,7 @@ std::vector<int_t> maxpy::input_sizes(array_expressions_container const & array_
void maxpy::enqueue(cl::CommandQueue & queue, void maxpy::enqueue(cl::CommandQueue & queue,
std::vector<cl_ext::lazy_compiler> & programs, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, unsigned int label,
array_expressions_container const & array_expressions) expressions_tuple const & expressions)
{ {
char kname[10]; char kname[10];
fill_kernel_name(kname, label, "d"); fill_kernel_name(kname, label, "d");
@@ -116,10 +116,10 @@ void maxpy::enqueue(cl::CommandQueue & queue,
cl::NDRange grange(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1); cl::NDRange grange(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1);
cl::NDRange lrange(p_.local_size_0, p_.local_size_1); cl::NDRange lrange(p_.local_size_0, p_.local_size_1);
unsigned int current_arg = 0; unsigned int current_arg = 0;
std::vector<int_t> MN = input_sizes(array_expressions); std::vector<int_t> MN = input_sizes(expressions);
kernel.setArg(current_arg++, cl_uint(MN[0])); kernel.setArg(current_arg++, cl_uint(MN[0]));
kernel.setArg(current_arg++, cl_uint(MN[1])); kernel.setArg(current_arg++, cl_uint(MN[1]));
set_arguments(array_expressions, kernel, current_arg); set_arguments(expressions, kernel, current_arg);
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange); queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
} }

View File

@@ -17,9 +17,9 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
mL(ms*local_size_0), nL(ns*local_size_1){} mL(ms*local_size_0), nL(ns*local_size_1){}
unsigned int mproduct::lmem_usage(array_expressions_container const & array_expressions) const unsigned int mproduct::lmem_usage(expressions_tuple const & expressions) const
{ {
atidlas::array_expression const & array_expression = (*array_expressions.data().front()); atidlas::array_expression const & array_expression = (*expressions.data().front());
numeric_type numeric_t = lhs_most(array_expression.tree(), array_expression.root()).lhs.dtype; numeric_type numeric_t = lhs_most(array_expression.tree(), array_expression.root()).lhs.dtype;
unsigned int N = 0; unsigned int N = 0;
@@ -30,16 +30,16 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
return N*size_of(numeric_t); return N*size_of(numeric_t);
} }
unsigned int mproduct::registers_usage(array_expressions_container const & array_expressions) const unsigned int mproduct::registers_usage(expressions_tuple const & expressions) const
{ {
atidlas::array_expression const & array_expression = (*array_expressions.data().front()); atidlas::array_expression const & array_expression = (*expressions.data().front());
numeric_type numeric_t = lhs_most(array_expression.tree(), array_expression.root()).lhs.dtype; numeric_type numeric_t = lhs_most(array_expression.tree(), array_expression.root()).lhs.dtype;
unsigned int N = p_.mS * p_.nS + p_.mS * p_.kS + p_.kS * p_.nS; unsigned int N = p_.mS * p_.nS + p_.mS * p_.kS + p_.kS * p_.nS;
return N*size_of(numeric_t); return N*size_of(numeric_t);
} }
int mproduct::check_invalid_impl(cl::Device const &, array_expressions_container const &) const int mproduct::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
{ {
if (p_.A_fetching_policy!=FETCH_FROM_LOCAL && p_.B_fetching_policy!=FETCH_FROM_LOCAL&& (p_.local_fetch_0!=0 || p_.local_fetch_1!=0)) if (p_.A_fetching_policy!=FETCH_FROM_LOCAL && p_.B_fetching_policy!=FETCH_FROM_LOCAL&& (p_.local_fetch_0!=0 || p_.local_fetch_1!=0))
return TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH; return TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH;
@@ -87,7 +87,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
return TEMPLATE_VALID; return TEMPLATE_VALID;
} }
std::string mproduct::generate_impl(unsigned int label, const char * id, const array_expressions_container &array_expressions, const std::vector<mapping_type> &, bool fallback) const std::string mproduct::generate_impl(unsigned int label, const char * id, const expressions_tuple &expressions, const std::vector<mapping_type> &, bool fallback) const
{ {
using std::string; using std::string;
using tools::to_string; using tools::to_string;
@@ -106,7 +106,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
/// INIT /// INIT
/// ////////////// /// //////////////
kernel_generation_stream stream; kernel_generation_stream stream;
array_expression const & st = (*array_expressions.data().front()); array_expression const & st = (*expressions.data().front());
numeric_type dtype = lhs_most(st.tree(), st.root()).lhs.dtype; numeric_type dtype = lhs_most(st.tree(), st.root()).lhs.dtype;
std::string dtypestr = numeric_type_to_string(dtype); std::string dtypestr = numeric_type_to_string(dtype);
@@ -557,11 +557,11 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
#undef VST0RE #undef VST0RE
} }
std::vector<std::string> mproduct::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const std::vector<std::string> mproduct::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
{ {
std::vector<std::string> res; std::vector<std::string> res;
res.push_back(generate_impl(label, "o", array_expressions, mappings, false)); res.push_back(generate_impl(label, "o", expressions, mappings, false));
res.push_back(generate_impl(label, "f", array_expressions, mappings, true)); res.push_back(generate_impl(label, "f", expressions, mappings, true));
return res; return res;
} }
@@ -615,10 +615,10 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
return result; return result;
} }
std::vector<int_t> mproduct::infos(array_expressions_container const & array_expressions, std::vector<int_t> mproduct::infos(expressions_tuple const & expressions,
lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B) lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B)
{ {
atidlas::array_expression const & array_expression = (*array_expressions.data().front()); atidlas::array_expression const & array_expression = (*expressions.data().front());
array_expression::container_type const & array = array_expression.tree(); array_expression::container_type const & array = array_expression.tree();
std::size_t root = array_expression.root(); std::size_t root = array_expression.root();
@@ -640,18 +640,18 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
mproduct::mproduct(mproduct_parameters const & parameters, char A_trans, char B_trans) : base_impl<mproduct, mproduct_parameters>(parameters, BIND_ALL_UNIQUE), A_trans_(A_trans), B_trans_(B_trans) mproduct::mproduct(mproduct_parameters const & parameters, char A_trans, char B_trans) : base_impl<mproduct, mproduct_parameters>(parameters, BIND_ALL_UNIQUE), A_trans_(A_trans), B_trans_(B_trans)
{ } { }
std::vector<int_t> mproduct::input_sizes(array_expressions_container const & array_expressions) std::vector<int_t> mproduct::input_sizes(expressions_tuple const & expressions)
{ {
lhs_rhs_element d0, d1, d2; lhs_rhs_element d0, d1, d2;
return infos(array_expressions, d0, d1, d2); return infos(expressions, d0, d1, d2);
} }
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, array_expressions_container const & array_expressions) void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, expressions_tuple const & expressions)
{ {
using namespace tools; using namespace tools;
lhs_rhs_element C, A, B; lhs_rhs_element C, A, B;
std::vector<int_t> MNK = infos(array_expressions, C, A, B); std::vector<int_t> MNK = infos(expressions, C, A, B);
int_t M = MNK[0]; int_t M = MNK[0];
int_t N = MNK[1]; int_t N = MNK[1];

View File

@@ -14,7 +14,7 @@ mreduction_parameters::mreduction_parameters(unsigned int _simd_width,
num_groups_0(_num_groups_0), fetch_policy(_fetch_policy) { } num_groups_0(_num_groups_0), fetch_policy(_fetch_policy) { }
int mreduction::check_invalid_impl(cl::Device const &, array_expressions_container const &) const int mreduction::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
{ {
if (p_.fetch_policy==FETCH_FROM_LOCAL) if (p_.fetch_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
@@ -26,7 +26,7 @@ unsigned int mreduction::lmem_usage() const
return p_.local_size_0*(p_.local_size_1+1); return p_.local_size_0*(p_.local_size_1+1);
} }
std::string mreduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width, std::vector<mapped_mreduction*> const & exprs) const std::string mreduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width, std::vector<mapped_mreduction*> const & exprs) const
{ {
using tools::to_string; using tools::to_string;
@@ -40,7 +40,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont
fill_kernel_name(kprefix, label, "d"); fill_kernel_name(kprefix, label, "d");
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl; stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
@@ -48,7 +48,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont
tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];") tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];")
("array1", "#pointer += #start;") ("array1", "#pointer += #start;")
("array2", "#pointer += #start1 + #start2*#ld; " ("array2", "#pointer += #start1 + #start2*#ld; "
"#ld *= #nldstride; "), array_expressions, mappings); "#ld *= #nldstride; "), expressions, mappings);
for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it) for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
stream << (*it)->process("__local #scalartype #name_buf[" + to_string(lsize0*lsize1) + "];") << std::endl; stream << (*it)->process("__local #scalartype #name_buf[" + to_string(lsize0*lsize1) + "];") << std::endl;
@@ -160,7 +160,7 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont
std::map<std::string, std::string> accessors; std::map<std::string, std::string> accessors;
accessors["mreduction"] = "#name_buf[lid0*" + lsize1str + "]"; accessors["mreduction"] = "#name_buf[lid0*" + lsize1str + "]";
accessors["array1"] = "#pointer[r*#stride]"; accessors["array1"] = "#pointer[r*#stride]";
evaluate(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings); evaluate(stream, PARENT_NODE_TYPE, accessors, expressions, mappings);
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
@@ -174,14 +174,14 @@ std::string mreduction::generate_impl(unsigned int label, array_expressions_cont
return stream.str(); return stream.str();
} }
std::vector<std::string> mreduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const std::vector<std::string> mreduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
{ {
std::vector<mapped_mreduction*> exprs; std::vector<mapped_mreduction*> exprs;
array_expressions_container::data_type::const_iterator sit; expressions_tuple::data_type::const_iterator sit;
std::vector<mapping_type>::const_iterator mit; std::vector<mapping_type>::const_iterator mit;
for (mit = mappings.begin(), sit = array_expressions.data().begin(); mit != mappings.end(); ++mit, ++sit) for (mit = mappings.begin(), sit = expressions.data().begin(); mit != mappings.end(); ++mit, ++sit)
{ {
array_expression const & first_expression = *array_expressions.data().front(); array_expression const & first_expression = *expressions.data().front();
std::vector<size_t> idx = filter_nodes(&is_reduction, first_expression, false); std::vector<size_t> idx = filter_nodes(&is_reduction, first_expression, false);
for (unsigned int j = 0; j < idx.size(); ++j) for (unsigned int j = 0; j < idx.size(); ++j)
exprs.push_back((mapped_mreduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get())); exprs.push_back((mapped_mreduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get()));
@@ -190,11 +190,11 @@ std::vector<std::string> mreduction::generate_impl(unsigned int label, array_exp
std::vector<std::string> res; std::vector<std::string> res;
if (reduction_type_ && p_.simd_width>1) if (reduction_type_ && p_.simd_width>1)
{ {
res.push_back(generate_impl(label, array_expressions, mappings, p_.simd_width, exprs)); res.push_back(generate_impl(label, expressions, mappings, p_.simd_width, exprs));
res.push_back(generate_impl(label, array_expressions, mappings, 1, exprs)); res.push_back(generate_impl(label, expressions, mappings, 1, exprs));
} }
else else
res.push_back(generate_impl(label, array_expressions, mappings, 1, exprs)); res.push_back(generate_impl(label, expressions, mappings, 1, exprs));
return res; return res;
} }
@@ -204,9 +204,9 @@ mreduction::mreduction(mreduction::parameters_type const & parameters,
base_impl<mreduction, mreduction_parameters>(parameters, binding_policy), base_impl<mreduction, mreduction_parameters>(parameters, binding_policy),
reduction_type_(rtype){ } reduction_type_(rtype){ }
std::vector<int_t> mreduction::input_sizes(array_expressions_container const & array_expressions) std::vector<int_t> mreduction::input_sizes(expressions_tuple const & expressions)
{ {
array_expression const & first_expression = *array_expressions.data().front(); array_expression const & first_expression = *expressions.data().front();
std::vector<std::size_t> idx = filter_nodes(&is_reduction, first_expression, false); std::vector<std::size_t> idx = filter_nodes(&is_reduction, first_expression, false);
std::pair<int_t, int_t> MN = matrix_size(lhs_most(first_expression.tree(), idx[0])); std::pair<int_t, int_t> MN = matrix_size(lhs_most(first_expression.tree(), idx[0]));
if(reduction_type_==REDUCE_COLUMNS) if(reduction_type_==REDUCE_COLUMNS)
@@ -217,15 +217,15 @@ std::vector<int_t> mreduction::input_sizes(array_expressions_container const & a
void mreduction::enqueue(cl::CommandQueue & queue, void mreduction::enqueue(cl::CommandQueue & queue,
std::vector<cl_ext::lazy_compiler> & programs, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, unsigned int label,
array_expressions_container const & array_expressions) expressions_tuple const & expressions)
{ {
char kname[10]; char kname[10];
fill_kernel_name(kname, label, "d"); fill_kernel_name(kname, label, "d");
std::vector<int_t> MN = input_sizes(array_expressions); std::vector<int_t> MN = input_sizes(expressions);
//Kernel //Kernel
int idx = 0; int idx = 0;
if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && requires_fallback(array_expressions)) if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && requires_fallback(expressions))
idx = 1; idx = 1;
cl::Program & program = programs[idx].program(); cl::Program & program = programs[idx].program();
cl::Kernel kernel(program, kname); cl::Kernel kernel(program, kname);
@@ -237,7 +237,7 @@ void mreduction::enqueue(cl::CommandQueue & queue,
unsigned int current_arg = 0; unsigned int current_arg = 0;
kernel.setArg(current_arg++, cl_uint(MN[0])); kernel.setArg(current_arg++, cl_uint(MN[0]));
kernel.setArg(current_arg++, cl_uint(MN[1])); kernel.setArg(current_arg++, cl_uint(MN[1]));
set_arguments(array_expressions, kernel, current_arg); set_arguments(expressions, kernel, current_arg);
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange); queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
} }

View File

@@ -13,10 +13,10 @@ reduction_parameters::reduction_parameters(unsigned int _simd_width,
fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _group_size, 1, 2), num_groups(_num_groups), fetching_policy(_fetching_policy) fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _group_size, 1, 2), num_groups(_num_groups), fetching_policy(_fetching_policy)
{ } { }
unsigned int reduction::lmem_usage(array_expressions_container const & array_expressions) const unsigned int reduction::lmem_usage(expressions_tuple const & expressions) const
{ {
unsigned int res = 0; unsigned int res = 0;
for(array_expressions_container::data_type::const_iterator it = array_expressions.data().begin() ; it != array_expressions.data().end() ; ++it) for(expressions_tuple::data_type::const_iterator it = expressions.data().begin() ; it != expressions.data().end() ; ++it)
{ {
numeric_type numeric_t= lhs_most((*it)->tree(), (*it)->root()).lhs.dtype; numeric_type numeric_t= lhs_most((*it)->tree(), (*it)->root()).lhs.dtype;
res += p_.local_size_0*size_of(numeric_t); res += p_.local_size_0*size_of(numeric_t);
@@ -24,7 +24,7 @@ unsigned int reduction::lmem_usage(array_expressions_container const & array_exp
return res; return res;
} }
int reduction::check_invalid_impl(cl::Device const &, array_expressions_container const &) const int reduction::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
{ {
if (p_.fetching_policy==FETCH_FROM_LOCAL) if (p_.fetching_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
@@ -56,7 +56,7 @@ inline void reduction::reduce_1d_local_memory(kernel_generation_stream & stream,
stream << "}" << std::endl; stream << "}" << std::endl;
} }
std::string reduction::generate_impl(unsigned int label, const char * type, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const std::string reduction::generate_impl(unsigned int label, const char * type, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
{ {
kernel_generation_stream stream; kernel_generation_stream stream;
@@ -89,13 +89,13 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra
fill_kernel_name(kprefix, label, type); fill_kernel_name(kprefix, label, type);
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl; stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
stream << "unsigned int lid = get_local_id(0);" << std::endl; stream << "unsigned int lid = get_local_id(0);" << std::endl;
process(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];") process(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];")
("array1", "#pointer += #start;"), array_expressions, mappings); ("array1", "#pointer += #start;"), expressions, mappings);
for (unsigned int k = 0; k < N; ++k) for (unsigned int k = 0; k < N; ++k)
{ {
@@ -194,7 +194,7 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra
* Second kernel * Second kernel
* -----------------------*/ * -----------------------*/
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, array_expressions) << ")" << std::endl; stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
@@ -246,7 +246,7 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra
std::map<std::string, std::string> accessors; std::map<std::string, std::string> accessors;
accessors["scalar_reduction"] = "#name_buf[0]"; accessors["scalar_reduction"] = "#name_buf[0]";
accessors["array0"] = "#pointer[#start]"; accessors["array0"] = "#pointer[#start]";
evaluate(stream, PARENT_NODE_TYPE, accessors, array_expressions, mappings); evaluate(stream, PARENT_NODE_TYPE, accessors, expressions, mappings);
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
@@ -256,11 +256,11 @@ std::string reduction::generate_impl(unsigned int label, const char * type, arra
return stream.str(); return stream.str();
} }
std::vector<std::string> reduction::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const std::vector<std::string> reduction::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
{ {
std::vector<std::string> result; std::vector<std::string> result;
result.push_back(generate_impl(label, "f", array_expressions, mappings, 1)); result.push_back(generate_impl(label, "f", expressions, mappings, 1));
result.push_back(generate_impl(label, "o", array_expressions, mappings, p_.simd_width)); result.push_back(generate_impl(label, "o", expressions, mappings, p_.simd_width));
return result; return result;
} }
@@ -273,22 +273,22 @@ reduction::reduction(unsigned int simd, unsigned int ls, unsigned int ng,
base_impl<reduction, reduction_parameters>(reduction_parameters(simd,ls,ng,fetch), bind) base_impl<reduction, reduction_parameters>(reduction_parameters(simd,ls,ng,fetch), bind)
{} {}
std::vector<int_t> reduction::input_sizes(array_expressions_container const & array_expressions) std::vector<int_t> reduction::input_sizes(expressions_tuple const & expressions)
{ {
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, *(array_expressions.data().front()), false); std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, *(expressions.data().front()), false);
int_t N = vector_size(lhs_most(array_expressions.data().front()->tree(), reductions_idx[0])); int_t N = vector_size(lhs_most(expressions.data().front()->tree(), reductions_idx[0]));
return tools::make_vector<int_t>() << N; return tools::make_vector<int_t>() << N;
} }
void reduction::enqueue(cl::CommandQueue & queue, void reduction::enqueue(cl::CommandQueue & queue,
std::vector<cl_ext::lazy_compiler> & programs, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, unsigned int label,
array_expressions_container const & array_expressions) expressions_tuple const & expressions)
{ {
//Preprocessing //Preprocessing
int_t size = input_sizes(array_expressions)[0]; int_t size = input_sizes(expressions)[0];
std::vector<array_expression::node const *> reductions; std::vector<array_expression::node const *> reductions;
for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it) for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
{ {
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, **it, false); std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, **it, false);
for (std::vector<size_t>::iterator itt = reductions_idx.begin(); itt != reductions_idx.end(); ++itt) for (std::vector<size_t>::iterator itt = reductions_idx.begin(); itt != reductions_idx.end(); ++itt)
@@ -303,7 +303,7 @@ void reduction::enqueue(cl::CommandQueue & queue,
fill_kernel_name(kopt[0], label, "o0"); fill_kernel_name(kopt[0], label, "o0");
fill_kernel_name(kopt[1], label, "o1"); fill_kernel_name(kopt[1], label, "o1");
bool fallback = p_.simd_width > 1 && (requires_fallback(array_expressions) || (size%p_.simd_width>0)); bool fallback = p_.simd_width > 1 && (requires_fallback(expressions) || (size%p_.simd_width>0));
cl::Program & program = programs[fallback?0:1].program(); cl::Program & program = programs[fallback?0:1].program();
cl::Kernel kernels[2] = { cl::Kernel(program, fallback?kfallback[0]:kopt[0]), cl::Kernel kernels[2] = { cl::Kernel(program, fallback?kfallback[0]:kopt[0]),
cl::Kernel(program, fallback?kfallback[1]:kopt[1]) }; cl::Kernel(program, fallback?kfallback[1]:kopt[1]) };
@@ -313,8 +313,8 @@ void reduction::enqueue(cl::CommandQueue & queue,
cl::NDRange lrange[2] = { cl::NDRange(p_.local_size_0), cl::NDRange(p_.local_size_0) }; cl::NDRange lrange[2] = { cl::NDRange(p_.local_size_0), cl::NDRange(p_.local_size_0) };
//Arguments //Arguments
cl::Context context = array_expressions.context(); cl::Context context = expressions.context();
array_expression const & s = *(array_expressions.data().front()); array_expression const & s = *(expressions.data().front());
unsigned int dtype_size = size_of(lhs_most(s.tree(), s.root()).lhs.dtype); unsigned int dtype_size = size_of(lhs_most(s.tree(), s.root()).lhs.dtype);
for (unsigned int k = 0; k < 2; k++) for (unsigned int k = 0; k < 2; k++)
{ {
@@ -338,7 +338,7 @@ void reduction::enqueue(cl::CommandQueue & queue,
kernels[k].setArg(n_arg++, tmp_[i]); kernels[k].setArg(n_arg++, tmp_[i]);
i++; i++;
} }
set_arguments(array_expressions, kernels[k], n_arg); set_arguments(expressions, kernels[k], n_arg);
} }
for (unsigned int k = 0; k < 2; k++) for (unsigned int k = 0; k < 2; k++)

View File

@@ -16,14 +16,14 @@ vaxpy_parameters::vaxpy_parameters(unsigned int _simd_width,
{ } { }
int vaxpy::check_invalid_impl(cl::Device const &, array_expressions_container const &) const int vaxpy::check_invalid_impl(cl::Device const &, expressions_tuple const &) const
{ {
if (p_.fetching_policy==FETCH_FROM_LOCAL) if (p_.fetching_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
return TEMPLATE_VALID; return TEMPLATE_VALID;
} }
std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressions_container const & array_expressions, std::vector<mapping_type> const & mappings) const std::vector<std::string> vaxpy::generate_impl(unsigned int label, expressions_tuple const & expressions, std::vector<mapping_type> const & mappings) const
{ {
std::vector<std::string> result; std::vector<std::string> result;
for (unsigned int i = 0; i < 2; ++i) for (unsigned int i = 0; i < 2; ++i)
@@ -36,14 +36,14 @@ std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressi
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
char kprefix[10]; char kprefix[10];
fill_kernel_name(kprefix, label, (i==0?"f":"o")); fill_kernel_name(kprefix, label, (i==0?"f":"o"));
stream << "__kernel void " << kprefix << "(unsigned int N," << generate_arguments(data_type, mappings, array_expressions) << ")" << std::endl; stream << "__kernel void " << kprefix << "(unsigned int N," << generate_arguments(data_type, mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
process(stream, PARENT_NODE_TYPE, process(stream, PARENT_NODE_TYPE,
tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];") tools::make_map<std::map<std::string, std::string> >("array0", "#scalartype #namereg = #pointer[#start];")
("array1", "#pointer += #start;") ("array1", "#pointer += #start;")
("array1", "#start1/=" + str_simd_width + ";"), array_expressions, mappings); ("array1", "#start1/=" + str_simd_width + ";"), expressions, mappings);
std::string init, upper_bound, inc; std::string init, upper_bound, inc;
fetching_loop_info(p_.fetching_policy, "N/"+str_simd_width, stream, init, upper_bound, inc, "get_global_id(0)", "get_global_size(0)"); fetching_loop_info(p_.fetching_policy, "N/"+str_simd_width, stream, init, upper_bound, inc, "get_global_id(0)", "get_global_size(0)");
@@ -55,7 +55,7 @@ std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressi
("matrix_row", "#scalartype #namereg = $VALUE{#row*#stride1, i*#stride2};") ("matrix_row", "#scalartype #namereg = $VALUE{#row*#stride1, i*#stride2};")
("matrix_column", "#scalartype #namereg = $VALUE{i*#stride1,#column*#stride2};") ("matrix_column", "#scalartype #namereg = $VALUE{i*#stride1,#column*#stride2};")
("matrix_diag", "#scalartype #namereg = #pointer[#diag_offset<0?$OFFSET{(i - #diag_offset)*#stride1, i*#stride2}:$OFFSET{i*#stride1, (i + #diag_offset)*#stride2}];") ("matrix_diag", "#scalartype #namereg = #pointer[#diag_offset<0?$OFFSET{(i - #diag_offset)*#stride1, i*#stride2}:$OFFSET{i*#stride1, (i + #diag_offset)*#stride2}];")
, array_expressions, mappings); , expressions, mappings);
evaluate(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", "#namereg") evaluate(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", "#namereg")
("matrix_row", "#namereg") ("matrix_row", "#namereg")
@@ -63,13 +63,13 @@ std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressi
("matrix_diag", "#namereg") ("matrix_diag", "#namereg")
("array0", "#namereg") ("array0", "#namereg")
("cast", "convert_"+data_type) ("cast", "convert_"+data_type)
, array_expressions, mappings); , expressions, mappings);
process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", "#pointer[i*#stride] = #namereg;") process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", "#pointer[i*#stride] = #namereg;")
("matrix_row", "$VALUE{#row, i} = #namereg;") ("matrix_row", "$VALUE{#row, i} = #namereg;")
("matrix_column", "$VALUE{i, #column} = #namereg;") ("matrix_column", "$VALUE{i, #column} = #namereg;")
("matrix_diag", "#diag_offset<0?$VALUE{(i - #diag_offset)*#stride1, i*#stride2}:$VALUE{i*#stride1, (i + #diag_offset)*#stride2} = #namereg;") ("matrix_diag", "#diag_offset<0?$VALUE{(i - #diag_offset)*#stride1, i*#stride2}:$VALUE{i*#stride1, (i + #diag_offset)*#stride2} = #namereg;")
,array_expressions, mappings); ,expressions, mappings);
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
@@ -77,7 +77,7 @@ std::vector<std::string> vaxpy::generate_impl(unsigned int label, array_expressi
stream << "if(get_global_id(0)==0)" << std::endl; stream << "if(get_global_id(0)==0)" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#pointer[#start] = #namereg;"), array_expressions, mappings); process(stream, LHS_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array0", "#pointer[#start] = #namereg;"), expressions, mappings);
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
@@ -102,25 +102,25 @@ vaxpy::vaxpy(unsigned int simd, unsigned int ls, unsigned int ng,
{} {}
std::vector<int_t> vaxpy::input_sizes(array_expressions_container const & array_expressions) std::vector<int_t> vaxpy::input_sizes(expressions_tuple const & expressions)
{ {
int_t size = static_cast<array_expression const *>(array_expressions.data().front().get())->shape()._1; int_t size = static_cast<array_expression const *>(expressions.data().front().get())->shape()._1;
return tools::make_vector<int_t>() << size; return tools::make_vector<int_t>() << size;
} }
void vaxpy::enqueue(cl::CommandQueue & queue, void vaxpy::enqueue(cl::CommandQueue & queue,
std::vector<cl_ext::lazy_compiler> & programs, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, unsigned int label,
array_expressions_container const & array_expressions) expressions_tuple const & expressions)
{ {
//Size //Size
int_t size = input_sizes(array_expressions)[0]; int_t size = input_sizes(expressions)[0];
//Kernel //Kernel
char kfb[10]; char kfb[10];
char kopt[10]; char kopt[10];
fill_kernel_name(kfb, label, "f"); fill_kernel_name(kfb, label, "f");
fill_kernel_name(kopt, label, "o"); fill_kernel_name(kopt, label, "o");
bool fallback = p_.simd_width > 1 && (requires_fallback(array_expressions) || (size%p_.simd_width>0)); bool fallback = p_.simd_width > 1 && (requires_fallback(expressions) || (size%p_.simd_width>0));
cl::Program const & program = programs[fallback?0:1].program(); cl::Program const & program = programs[fallback?0:1].program();
cl_ext::kernels_t::key_type key(program(), label); cl_ext::kernels_t::key_type key(program(), label);
@@ -135,7 +135,7 @@ void vaxpy::enqueue(cl::CommandQueue & queue,
//Arguments //Arguments
unsigned int current_arg = 0; unsigned int current_arg = 0;
kernel.setArg(current_arg++, cl_uint(size)); kernel.setArg(current_arg++, cl_uint(size));
set_arguments(array_expressions, kernel, current_arg); set_arguments(expressions, kernel, current_arg);
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange); queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
queue.flush(); queue.flush();
} }

View File

@@ -28,9 +28,9 @@ std::string model::define_extension(std::string const & extensions, std::string
return std::string(""); return std::string("");
} }
void model::fill_program_name(char* program_name, array_expressions_container const & array_expressions, binding_policy_t binding_policy) void model::fill_program_name(char* program_name, expressions_tuple const & expressions, binding_policy_t binding_policy)
{ {
if (array_expressions.order()==array_expressions_container::INDEPENDENT) if (expressions.order()==expressions_tuple::INDEPENDENT)
*program_name++='i'; *program_name++='i';
else else
*program_name++='s'; *program_name++='s';
@@ -39,31 +39,39 @@ void model::fill_program_name(char* program_name, array_expressions_container co
binder = new bind_to_handle(); binder = new bind_to_handle();
else else
binder = new bind_all_unique(); binder = new bind_all_unique();
for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it) for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
traverse(**it, (*it)->root(), array_expression_representation_functor(*binder, program_name),true); traverse(**it, (*it)->root(), array_expression_representation_functor(*binder, program_name),true);
*program_name='\0'; *program_name='\0';
delete binder; delete binder;
} }
std::vector<cl_ext::lazy_compiler>& model::init(array_expressions_container const & array_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation) std::vector<cl_ext::lazy_compiler>& model::init(expressions_tuple const & expressions, runtime_options const & opt)
{ {
char program_name[256]; cl::Context const & context = expressions.context();
fill_program_name(program_name, array_expressions, BIND_TO_HANDLE); std::string pname;
std::string pname(program_name); if(opt.program_name.empty())
{
char program_name[256];
fill_program_name(program_name, expressions, BIND_TO_HANDLE);
pname = std::string(program_name);
}
else
pname = opt.program_name;
std::vector<cl_ext::lazy_compiler> & to_init = lazy_programs_[context()][pname]; std::vector<cl_ext::lazy_compiler> & to_init = lazy_programs_[context()][pname];
if(to_init.empty()) if(to_init.empty())
{ {
cl::Device device = queue_.getInfo<CL_QUEUE_DEVICE>();
std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>(); std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
to_init.push_back(cl_ext::lazy_compiler(context, pname, force_recompilation)); to_init.push_back(cl_ext::lazy_compiler(context, pname, opt.recompile));
to_init.back().add(define_extension(extensions, "cl_khr_fp64")); to_init.back().add(define_extension(extensions, "cl_khr_fp64"));
to_init.push_back(cl_ext::lazy_compiler(context, pname + "_fb", force_recompilation)); to_init.push_back(cl_ext::lazy_compiler(context, pname + "_fb", opt.recompile));
to_init.back().add(define_extension(extensions, "cl_khr_fp64")); to_init.back().add(define_extension(extensions, "cl_khr_fp64"));
for(size_t i = 0 ; i < templates_.size() ; ++i) for(size_t i = 0 ; i < templates_.size() ; ++i)
{ {
std::vector<std::string> cur = templates_[i]->generate(i, array_expressions, device); std::vector<std::string> cur = templates_[i]->generate(i, expressions, device);
for(size_t j = 0 ; j < cur.size() ; ++j){ for(size_t j = 0 ; j < cur.size() ; ++j){
to_init[j].add(cur[j]); to_init[j].add(cur[j]);
} }
@@ -82,42 +90,37 @@ model::model(std::vector< tools::shared_ptr<base> > const & templates, cl::Comma
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue) model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
{} {}
void model::execute(array_expressions_container const & array_expressions, bool bypass_predictor, bool force_recompilation) void model::execute(expressions_tuple const & expressions, runtime_options const & opt)
{ {
bypass_predictor = bypass_predictor || predictor_.get()==NULL; std::vector<cl_ext::lazy_compiler> & compilers = init(expressions, opt);
cl::Context const & context = array_expressions.context();
assert(context() == queue_.getInfo<CL_QUEUE_CONTEXT>()());
cl::Device const & device = queue_.getInfo<CL_QUEUE_DEVICE>();
std::vector<cl_ext::lazy_compiler> & compilers = init(array_expressions, context, device, force_recompilation);
//Prediction //Prediction
std::vector<int_t> x = templates_[0]->input_sizes(array_expressions); int label = 0;
int label; if(opt.label>=0)
//The user tuned the model specifically for this input size {
if(hardcoded_.find(x)!=hardcoded_.end()) label = opt.label;
label = hardcoded_.at(x); }
//The user bypasses the random forest
else if(bypass_predictor)
label = 0;
//Default
else else
{ {
std::vector<float> predictions = predictor_->predict(x); std::vector<int_t> x = templates_[0]->input_sizes(expressions);
label = std::distance(predictions.begin(),std::min_element(predictions.begin(), predictions.end())); //The user tuned the model specifically for this input size
if(hardcoded_.find(x)!=hardcoded_.end())
label = hardcoded_.at(x);
//The user bypasses the random forest
else if(predictor_.get())
{
std::vector<float> predictions = predictor_->predict(x);
label = std::distance(predictions.begin(),std::min_element(predictions.begin(), predictions.end()));
}
} }
//Execution //Execution
templates_[label]->enqueue(queue_, compilers, label, array_expressions); templates_[label]->enqueue(queue_, compilers, label, expressions);
} }
void model::tune(array_expressions_container const & array_expressions) void model::tune(expressions_tuple const & expressions)
{ {
cl::Context const & context = array_expressions.context(); std::vector<cl_ext::lazy_compiler> & compilers = init(expressions);
assert(context() == queue_.getInfo<CL_QUEUE_CONTEXT>()());
cl::Device device = queue_.getInfo<CL_QUEUE_DEVICE>();
std::vector<cl_ext::lazy_compiler> & compilers = init(array_expressions, context, device, false);
//Collect the timings //Collect the timings
std::vector<float> timings(templates_.size()); std::vector<float> timings(templates_.size());
@@ -125,13 +128,13 @@ void model::tune(array_expressions_container const & array_expressions)
for(size_t i = 0 ; i < templates_.size() ; ++i) for(size_t i = 0 ; i < templates_.size() ; ++i)
{ {
timer.start(); timer.start();
templates_[i]->enqueue(queue_, compilers, i, array_expressions); templates_[i]->enqueue(queue_, compilers, i, expressions);
queue_.finish(); queue_.finish();
timings[i] = timer.get(); timings[i] = timer.get();
} }
//Fill the override //Fill the override
std::vector<int_t> x = templates_[0]->input_sizes(array_expressions); std::vector<int_t> x = templates_[0]->input_sizes(expressions);
hardcoded_[x] = std::distance(timings.begin(),std::min_element(timings.begin(), timings.end())); hardcoded_[x] = std::distance(timings.begin(),std::min_element(timings.begin(), timings.end()));
} }

View File

@@ -32,19 +32,19 @@ random_forest::random_forest(rapidjson::Value const & estimators)
{ {
for(rapidjson::SizeType i = 0 ; i < estimators.Size() ; ++i) for(rapidjson::SizeType i = 0 ; i < estimators.Size() ; ++i)
estimators_.push_back(tree(estimators[i])); estimators_.push_back(tree(estimators[i]));
D_ = estimators_.front().D();
} }
std::vector<float> random_forest::predict(std::vector<int_t> const & x) const std::vector<float> random_forest::predict(std::vector<int_t> const & x) const
{ {
int_t D = estimators_.front().D(); std::vector<float> res(D_, 0);
std::vector<float> res(D, 0);
for(std::vector<tree>::const_iterator it = estimators_.begin() ; it != estimators_.end() ; ++it) for(std::vector<tree>::const_iterator it = estimators_.begin() ; it != estimators_.end() ; ++it)
{ {
std::vector<float> const & subres = it->predict(x); std::vector<float> const & subres = it->predict(x);
for(int_t i = 0 ; i < D ; ++i) for(int_t i = 0 ; i < D_ ; ++i)
res[i] += subres[i]; res[i] += subres[i];
} }
for(int_t i = 0 ; i < D ; ++i) for(int_t i = 0 ; i < D_ ; ++i)
res[i] /= estimators_.size(); res[i] /= estimators_.size();
return res; return res;
} }

View File

@@ -176,32 +176,32 @@ array_expression array_expression::operator!()
// //
tools::shared_ptr<array_expression> array_expressions_container::create(array_expression const & s) tools::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
{ {
return tools::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s))); return tools::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
} }
array_expressions_container::array_expressions_container(data_type const & data, order_type order) : data_(data), order_(order) expressions_tuple::expressions_tuple(data_type const & data, order_type order) : data_(data), order_(order)
{ } { }
array_expressions_container::array_expressions_container(array_expression const & s0) : order_(INDEPENDENT) expressions_tuple::expressions_tuple(array_expression const & s0) : order_(INDEPENDENT)
{ {
data_.push_back(create(s0)); data_.push_back(create(s0));
} }
array_expressions_container::array_expressions_container(order_type order, array_expression const & s0, array_expression const & s1) : order_(order) expressions_tuple::expressions_tuple(order_type order, array_expression const & s0, array_expression const & s1) : order_(order)
{ {
data_.push_back(create(s0)); data_.push_back(create(s0));
data_.push_back(create(s1)); data_.push_back(create(s1));
} }
array_expressions_container::data_type const & array_expressions_container::data() const expressions_tuple::data_type const & expressions_tuple::data() const
{ return data_; } { return data_; }
cl::Context const & array_expressions_container::context() const cl::Context const & expressions_tuple::context() const
{ return data_.front()->context(); } { return data_.front()->context(); }
array_expressions_container::order_type array_expressions_container::order() const expressions_tuple::order_type expressions_tuple::order() const
{ return order_; } { return order_; }
array_expression::node const & lhs_most(array_expression::container_type const & array, array_expression::node const & init) array_expression::node const & lhs_most(array_expression::container_type const & array, array_expression::node const & init)

View File

@@ -477,7 +477,7 @@ void export_array()
.def(bp::self OP bp::self)\ .def(bp::self OP bp::self)\
ADD_SCALAR_HANDLING(OP) ADD_SCALAR_HANDLING(OP)
bp::class_<atd::array_expressions_container> bp::class_<atd::expressions_tuple>
("array_expression_container", bp::init<atd::array_expression const &>()) ("array_expression_container", bp::init<atd::array_expression const &>())
; ;