Better control flow through options
This commit is contained in:
@@ -27,44 +27,39 @@ void bench(ad::numeric_type dtype)
|
|||||||
ad::tools::timer timer;
|
ad::tools::timer timer;
|
||||||
unsigned int dtsize = ad::size_of(dtype);
|
unsigned int dtsize = ad::size_of(dtype);
|
||||||
|
|
||||||
#define BENCHMARK(OP, PERF) \
|
#define BENCHMARK(OP, TIME, PERF) \
|
||||||
{\
|
{\
|
||||||
times.clear();\
|
|
||||||
total_time = 0;\
|
total_time = 0;\
|
||||||
OP;\
|
while(total_time < 1e-3){\
|
||||||
while(total_time < 1e-1){\
|
cl::Event event;\
|
||||||
timer.start(); \
|
|
||||||
OP;\
|
OP;\
|
||||||
times.push_back(timer.get());\
|
times.push_back(TIME);\
|
||||||
total_time += times.back();\
|
total_time += times.back();\
|
||||||
}\
|
}\
|
||||||
float tres = median(times);\
|
float tres = median(times);\
|
||||||
std::cout << " " << PERF << std::flush;\
|
std::cout << " " << tres << std::flush;\
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CL_TIME
|
||||||
|
#define CL_SYNC queue.flush(); queue.finish()
|
||||||
/*---------*/
|
/*---------*/
|
||||||
/*--BLAS1--*/
|
/*--BLAS1--*/
|
||||||
/*---------*/
|
/*---------*/
|
||||||
std::cout << "#AXPY" << std::endl;
|
std::cout << "#AXPY" << std::endl;
|
||||||
for(auto N : BLAS1_N)
|
for(auto N : BLAS1_N)
|
||||||
{
|
{
|
||||||
|
|
||||||
std::cout << N;
|
std::cout << N;
|
||||||
/* ATIDLAS */
|
/* ATIDLAS */
|
||||||
ad::array x(N, dtype), y(N, dtype);
|
ad::array x(N, dtype), y(N, dtype);
|
||||||
cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
|
cl::CommandQueue& queue = ad::cl_ext::queues[x.context()][0];
|
||||||
ad::model & model = ad::get_model(queue, ad::VECTOR_AXPY_TYPE, dtype);
|
cl::Event event;
|
||||||
ad::array_expression E = ad::detail::assign(y, x + y);
|
y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &event));
|
||||||
model.tune(E);
|
queue.flush(); queue.finish();
|
||||||
ad::operation_cache cache;
|
std::cout << " " << bandwidth(3*N, 1e-9*(event.getProfilingInfo<CL_PROFILING_COMMAND_END>() - event.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>()), dtsize) << std::flush;
|
||||||
model.execute(E, &cache);
|
// /* clAmdBlas */
|
||||||
queue.flush();
|
//#ifdef BENCH_CLAMDBLAS
|
||||||
queue.finish();
|
// BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, &event(), NULL); CL_SYNC, CL_TIME, bandwidth(3*N, tres, dtsize))
|
||||||
BENCHMARK(cache.enqueue(); queue.flush(); queue.finish();, bandwidth(3*N, tres, dtsize));
|
//#endif
|
||||||
/* clAmdBlas */
|
|
||||||
#ifdef BENCH_CLAMDBLAS
|
|
||||||
BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, NULL); queue.flush(); queue.finish();, bandwidth(3*N, tres, dtsize))
|
|
||||||
#endif
|
|
||||||
/* BLAS */
|
/* BLAS */
|
||||||
#ifdef BENCH_CBLAS
|
#ifdef BENCH_CBLAS
|
||||||
std::vector<float> cx(N), cy(N);
|
std::vector<float> cx(N), cy(N);
|
||||||
@@ -177,6 +172,8 @@ int main(int argc, char* argv[])
|
|||||||
clAmdBlasSetup();
|
clAmdBlasSetup();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
ad::cl_ext::queue_properties = CL_QUEUE_PROFILING_ENABLE;
|
||||||
|
|
||||||
int device_idx = 0;
|
int device_idx = 0;
|
||||||
ad::cl_ext::queues_type::data_type const & queues = ad::cl_ext::queues.data();
|
ad::cl_ext::queues_type::data_type const & queues = ad::cl_ext::queues.data();
|
||||||
|
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
#define ATIDLAS_ARRAY_H_
|
#define ATIDLAS_ARRAY_H_
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <type_traits>
|
||||||
#include <CL/cl.hpp>
|
#include <CL/cl.hpp>
|
||||||
#include "atidlas/types.h"
|
#include "atidlas/types.h"
|
||||||
#include "atidlas/cl_ext/backend.h"
|
#include "atidlas/cl_ext/backend.h"
|
||||||
@@ -16,6 +17,8 @@ class scalar;
|
|||||||
class array: public array_base
|
class array: public array_base
|
||||||
{
|
{
|
||||||
friend array reshape(array const &, int_t, int_t);
|
friend array reshape(array const &, int_t, int_t);
|
||||||
|
template<class T>
|
||||||
|
struct is_array { enum{ value = std::is_same<T, array>::value || std::is_same<T, array_expression>::value}; };
|
||||||
public:
|
public:
|
||||||
//1D Constructors
|
//1D Constructors
|
||||||
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());
|
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());
|
||||||
@@ -51,8 +54,10 @@ public:
|
|||||||
//Numeric operators
|
//Numeric operators
|
||||||
array& operator=(array const &);
|
array& operator=(array const &);
|
||||||
array& operator=(array_expression const &);
|
array& operator=(array_expression const &);
|
||||||
|
template<class T>
|
||||||
template<class T> array & operator=(std::vector<T> const & rhs);
|
array& operator=(controller<T> const &);
|
||||||
|
template<class T>
|
||||||
|
array & operator=(std::vector<T> const & rhs);
|
||||||
|
|
||||||
array_expression operator-();
|
array_expression operator-();
|
||||||
array_expression operator!();
|
array_expression operator!();
|
||||||
|
@@ -163,7 +163,7 @@ public:
|
|||||||
std::vector<std::string> generate(unsigned int label, expressions_tuple const & expressions, cl::Device const & device);
|
std::vector<std::string> generate(unsigned int label, expressions_tuple const & expressions, cl::Device const & device);
|
||||||
virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0;
|
virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0;
|
||||||
virtual void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
virtual void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
||||||
unsigned int label, expressions_tuple const & expressions, operation_cache* cache = NULL) = 0;
|
unsigned int label, controller<expressions_tuple> const & expressions) = 0;
|
||||||
virtual std::shared_ptr<base> clone() const = 0;
|
virtual std::shared_ptr<base> clone() const = 0;
|
||||||
private:
|
private:
|
||||||
binding_policy_t binding_policy_;
|
binding_policy_t binding_policy_;
|
||||||
|
@@ -27,7 +27,7 @@ public:
|
|||||||
maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
||||||
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
|
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
|
||||||
std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
||||||
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, expressions_tuple const & expressions, operation_cache * cache = NULL);
|
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, controller<expressions_tuple> const &);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -41,14 +41,14 @@ private:
|
|||||||
void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
|
void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
|
||||||
array_infos const & A, array_infos const & B, array_infos const & C,
|
array_infos const & A, array_infos const & B, array_infos const & C,
|
||||||
value_scalar const & alpha, value_scalar const & beta,
|
value_scalar const & alpha, value_scalar const & beta,
|
||||||
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id, operation_cache * cache);
|
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id, execution_options_type const & options);
|
||||||
array_infos create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap);
|
array_infos create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap);
|
||||||
std::vector<int_t> infos(expressions_tuple const & expressions,
|
std::vector<int_t> infos(expressions_tuple const & expressions,
|
||||||
lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B);
|
lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B);
|
||||||
public:
|
public:
|
||||||
mproduct(mproduct::parameters_type const & parameters, char A_trans, char B_trans);
|
mproduct(mproduct::parameters_type const & parameters, char A_trans, char B_trans);
|
||||||
std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
||||||
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, expressions_tuple const & expressions, operation_cache * cache = NULL);
|
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, controller<expressions_tuple> const &);
|
||||||
private:
|
private:
|
||||||
const char A_trans_;
|
const char A_trans_;
|
||||||
const char B_trans_;
|
const char B_trans_;
|
||||||
|
@@ -35,7 +35,7 @@ private:
|
|||||||
std::vector<std::string> generate_impl(unsigned int, expressions_tuple const &, std::vector<mapping_type> const &) const;
|
std::vector<std::string> generate_impl(unsigned int, expressions_tuple const &, std::vector<mapping_type> const &) const;
|
||||||
public:
|
public:
|
||||||
virtual std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
virtual std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
||||||
void enqueue(cl::CommandQueue & queue,std::vector<cl_ext::lazy_compiler> & programs,unsigned int label, expressions_tuple const & expressions, operation_cache * cache = NULL);
|
void enqueue(cl::CommandQueue & queue,std::vector<cl_ext::lazy_compiler> & programs,unsigned int label, controller<expressions_tuple> const &);
|
||||||
private:
|
private:
|
||||||
reduction_type reduction_type_;
|
reduction_type reduction_type_;
|
||||||
};
|
};
|
||||||
|
@@ -29,7 +29,7 @@ public:
|
|||||||
reduction(reduction::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
reduction(reduction::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
||||||
reduction(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
|
reduction(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
|
||||||
std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
||||||
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, expressions_tuple const & expressions, operation_cache * cache = NULL);
|
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, controller<expressions_tuple> const &);
|
||||||
private:
|
private:
|
||||||
std::vector< cl::Buffer > tmp_;
|
std::vector< cl::Buffer > tmp_;
|
||||||
std::vector< cl::Buffer > tmpidx_;
|
std::vector< cl::Buffer > tmpidx_;
|
||||||
|
@@ -23,7 +23,7 @@ public:
|
|||||||
vaxpy(vaxpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
vaxpy(vaxpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
||||||
vaxpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
vaxpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
||||||
std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
std::vector<int_t> input_sizes(expressions_tuple const & expressions);
|
||||||
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, expressions_tuple const & expressions, operation_cache * cache = NULL);
|
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, controller<expressions_tuple> const &);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -17,29 +17,19 @@ namespace atidlas
|
|||||||
class model
|
class model
|
||||||
{
|
{
|
||||||
typedef std::vector< std::shared_ptr<base> > templates_container;
|
typedef std::vector< std::shared_ptr<base> > templates_container;
|
||||||
public:
|
|
||||||
struct runtime_options
|
|
||||||
{
|
|
||||||
runtime_options() : label(-1), recompile(false){}
|
|
||||||
runtime_options(std::string const & p) : program_name(p), label(-1), recompile(false){}
|
|
||||||
|
|
||||||
std::string program_name;
|
|
||||||
int label;
|
|
||||||
bool recompile;
|
|
||||||
};
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string define_extension(std::string const & extensions, std::string const & ext);
|
std::string define_extension(std::string const & extensions, std::string const & ext);
|
||||||
inline void fill_program_name(char* program_name, expressions_tuple const & expressions, binding_policy_t binding_policy);
|
inline void fill_program_name(char* program_name, expressions_tuple const & expressions, binding_policy_t binding_policy);
|
||||||
std::vector<cl_ext::lazy_compiler>& init(expressions_tuple const & expressions, runtime_options const & opt = runtime_options());
|
std::vector<cl_ext::lazy_compiler>& init(controller<expressions_tuple> const &);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
model(predictors::random_forest const &, std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
|
model(predictors::random_forest const &, std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||||
model(std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
|
model(std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||||
model(base const &, cl::CommandQueue &);
|
model(base const &, cl::CommandQueue &);
|
||||||
|
|
||||||
void execute(expressions_tuple const &, operation_cache * cache = NULL, runtime_options const & opt = runtime_options());
|
void execute(controller<expressions_tuple> const &);
|
||||||
void tune(expressions_tuple const &);
|
void tune(controller<expressions_tuple> const &);
|
||||||
|
|
||||||
templates_container const & templates() const;
|
templates_container const & templates() const;
|
||||||
private:
|
private:
|
||||||
|
@@ -9,7 +9,7 @@ namespace atidlas
|
|||||||
{
|
{
|
||||||
|
|
||||||
/** @brief Executes a array_expression on the given queue for the given models map*/
|
/** @brief Executes a array_expression on the given queue for the given models map*/
|
||||||
void execute(array_expression &, model_map_t &, operation_cache * cache = NULL);
|
void execute(controller<array_expression> const & , model_map_t &);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -208,27 +208,83 @@ private:
|
|||||||
size4 shape_;
|
size4 shape_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class operation_cache
|
||||||
|
{
|
||||||
|
struct infos
|
||||||
|
{
|
||||||
|
cl::CommandQueue & queue;
|
||||||
|
cl::Kernel kernel;
|
||||||
|
cl::NDRange offset;
|
||||||
|
cl::NDRange global;
|
||||||
|
cl::NDRange local;
|
||||||
|
std::vector<cl::Event>* dependencies;
|
||||||
|
cl::Event* event;
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies, cl::Event* event)
|
||||||
|
{ l_.push_back({queue, kernel, offset, global, local, dependencies, event}); }
|
||||||
|
|
||||||
|
void enqueue()
|
||||||
|
{
|
||||||
|
for(infos & i : l_)
|
||||||
|
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, i.event);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::list<infos> l_;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct execution_options_type
|
||||||
|
{
|
||||||
|
execution_options_type(unsigned int _queue_id = 0, cl::Event* _event = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), event(_event), cache(_cache), dependencies(_dependencies){}
|
||||||
|
|
||||||
|
void enqueue_cache(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange offset, cl::NDRange global, cl::NDRange local) const
|
||||||
|
{
|
||||||
|
queue.enqueueNDRangeKernel(kernel, offset, global, local, dependencies, event);
|
||||||
|
if(cache)
|
||||||
|
cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies, event);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int queue_id;
|
||||||
|
cl::Event* event;
|
||||||
|
operation_cache* cache;
|
||||||
|
std::vector<cl::Event>* dependencies;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct dispatcher_options_type
|
||||||
|
{
|
||||||
|
dispatcher_options_type(int _label = -1) : label(_label){}
|
||||||
|
int label;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct compilation_options_type
|
||||||
|
{
|
||||||
|
compilation_options_type(std::string const & _program_name = "", bool _recompile = false) : program_name(_program_name), recompile(_recompile){}
|
||||||
|
std::string program_name;
|
||||||
|
bool recompile;
|
||||||
|
};
|
||||||
|
|
||||||
template<class TYPE>
|
template<class TYPE>
|
||||||
class controller
|
class controller
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
controller(TYPE const & x, cl::Event* event = NULL, std::vector<cl::Event>* dependencies = NULL,
|
controller(TYPE const & x, execution_options_type const& execution_options = execution_options_type(),
|
||||||
cl::CommandQueue* queue = NULL, operation_cache* cache = NULL) : x_(x), event_(event), dependencies_(dependencies), queue_(queue), cache_(cache){}
|
dispatcher_options_type const & dispatcher_options = dispatcher_options_type(), compilation_options_type const & compilation_options = compilation_options_type())
|
||||||
|
: x_(x), execution_options_(execution_options), dispatcher_options_(dispatcher_options), compilation_options_(compilation_options){}
|
||||||
|
|
||||||
TYPE const & x() const { return x_; }
|
TYPE const & x() const { return x_; }
|
||||||
cl::Event* event() const { return event_; }
|
execution_options_type const & execution_options() const { return execution_options_; }
|
||||||
std::vector<cl::Event>* dependencies() const { return dependencies_; }
|
dispatcher_options_type const & dispatcher_options() const { return dispatcher_options_; }
|
||||||
cl::CommandQueue* queue() const { return queue_; }
|
compilation_options_type const & compilation_options() const { return compilation_options_; }
|
||||||
operation_cache* cache() const { return cache_; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TYPE const & x_;
|
TYPE const & x_;
|
||||||
cl::Event* event_;
|
execution_options_type execution_options_;
|
||||||
std::vector<cl::Event>* dependencies_;
|
dispatcher_options_type dispatcher_options_;
|
||||||
cl::CommandQueue* queue_;
|
compilation_options_type compilation_options_;
|
||||||
operation_cache* cache_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class expressions_tuple
|
class expressions_tuple
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
@@ -61,36 +61,6 @@ struct array_infos
|
|||||||
int_t ld;
|
int_t ld;
|
||||||
};
|
};
|
||||||
|
|
||||||
class operation_cache
|
|
||||||
{
|
|
||||||
struct infos
|
|
||||||
{
|
|
||||||
infos(cl::CommandQueue & q, cl::Kernel const & k, cl::NDRange const & off, cl::NDRange const & g, cl::NDRange const & l)
|
|
||||||
: queue(q), kernel(k), offset(off), grange(g), lrange(l) {}
|
|
||||||
|
|
||||||
cl::CommandQueue & queue;
|
|
||||||
cl::Kernel kernel;
|
|
||||||
cl::NDRange offset;
|
|
||||||
cl::NDRange grange;
|
|
||||||
cl::NDRange lrange;
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & grange, cl::NDRange const & lrange)
|
|
||||||
{
|
|
||||||
l_.push_back(infos(queue, kernel, offset, grange, lrange));
|
|
||||||
}
|
|
||||||
|
|
||||||
void enqueue()
|
|
||||||
{
|
|
||||||
for(infos & elem : l_)
|
|
||||||
elem.queue.enqueueNDRangeKernel(elem.kernel, elem.offset, elem.grange, elem.lrange);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::list<infos> l_;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline std::string numeric_type_to_string(numeric_type const & type)
|
inline std::string numeric_type_to_string(numeric_type const & type)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
|
@@ -131,22 +131,17 @@ int_t array::dsize() const
|
|||||||
/*--- Assignment Operators ----*/
|
/*--- Assignment Operators ----*/
|
||||||
//---------------------------------------
|
//---------------------------------------
|
||||||
array & array::operator=(array const & rhs)
|
array & array::operator=(array const & rhs)
|
||||||
{
|
{ return *this = controller<array>(rhs); }
|
||||||
assert(dtype_ == rhs.dtype());
|
|
||||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
|
|
||||||
cl::CommandQueue & queue = cl_ext::queues[context_][0];
|
|
||||||
model_map_t & mmap = atidlas::get_model_map(queue);
|
|
||||||
execute(expression, mmap);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
array & array::operator=(array_expression const & rhs)
|
array & array::operator=(array_expression const & rhs)
|
||||||
|
{ return *this = controller<array_expression>(rhs); }
|
||||||
|
|
||||||
|
template<class TYPE>
|
||||||
|
array& array::operator=(controller<TYPE> const & c)
|
||||||
{
|
{
|
||||||
assert(dtype_ == rhs.dtype());
|
assert(dtype_ == c.x().dtype());
|
||||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), dtype_, shape_);
|
execute(controller<array_expression>(detail::assign(*this, c.x()), c.execution_options(), c.dispatcher_options(), c.compilation_options()),
|
||||||
cl::CommandQueue & queue = cl_ext::queues[context_][0];
|
atidlas::get_model_map(cl_ext::queues[context_][c.execution_options().queue_id]));
|
||||||
model_map_t & mmap = atidlas::get_model_map(queue);
|
|
||||||
execute(expression, mmap);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -105,23 +105,22 @@ std::vector<int_t> maxpy::input_sizes(expressions_tuple const & expressions)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void maxpy::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
void maxpy::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
||||||
unsigned int label, expressions_tuple const & expressions, operation_cache * cache)
|
unsigned int label, controller<expressions_tuple> const & controller)
|
||||||
{
|
{
|
||||||
|
expressions_tuple const & expressions = controller.x();
|
||||||
char kname[10];
|
char kname[10];
|
||||||
fill_kernel_name(kname, label, "d");
|
fill_kernel_name(kname, label, "d");
|
||||||
cl::Program & program = programs[0].program();
|
cl::Program & program = programs[0].program();
|
||||||
cl::Kernel kernel(program, kname);
|
cl::Kernel kernel(program, kname);
|
||||||
cl::NDRange grange(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1);
|
cl::NDRange global(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1);
|
||||||
cl::NDRange lrange(p_.local_size_0, p_.local_size_1);
|
cl::NDRange local(p_.local_size_0, p_.local_size_1);
|
||||||
unsigned int current_arg = 0;
|
unsigned int current_arg = 0;
|
||||||
std::vector<int_t> MN = input_sizes(expressions);
|
std::vector<int_t> MN = input_sizes(expressions);
|
||||||
kernel.setArg(current_arg++, cl_uint(MN[0]));
|
kernel.setArg(current_arg++, cl_uint(MN[0]));
|
||||||
kernel.setArg(current_arg++, cl_uint(MN[1]));
|
kernel.setArg(current_arg++, cl_uint(MN[1]));
|
||||||
set_arguments(expressions, kernel, current_arg);
|
set_arguments(expressions, kernel, current_arg);
|
||||||
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
|
|
||||||
|
|
||||||
if(cache)
|
controller.execution_options().enqueue_cache(queue, kernel, cl::NullRange, global, local);
|
||||||
cache->push_back(queue, kernel, cl::NullRange, grange, lrange);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template class base_impl<maxpy, maxpy_parameters>;
|
template class base_impl<maxpy, maxpy_parameters>;
|
||||||
|
@@ -568,7 +568,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
void mproduct::enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
|
void mproduct::enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
|
||||||
array_infos const & A, array_infos const & B, array_infos const & C,
|
array_infos const & A, array_infos const & B, array_infos const & C,
|
||||||
value_scalar const & alpha, value_scalar const & beta,
|
value_scalar const & alpha, value_scalar const & beta,
|
||||||
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id, operation_cache * cache)
|
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id, execution_options_type const & options)
|
||||||
{
|
{
|
||||||
if (A.shape1==0 || A.shape2==0 || B.shape1==0 || B.shape2==0 || C.shape1==0 || C.shape2==0)
|
if (A.shape1==0 || A.shape2==0 || B.shape1==0 || B.shape2==0 || C.shape1==0 || C.shape2==0)
|
||||||
return;
|
return;
|
||||||
@@ -578,8 +578,8 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
|
|
||||||
cl::Program & program = programs[id].program();
|
cl::Program & program = programs[id].program();
|
||||||
cl::Kernel kernel(program, kname);
|
cl::Kernel kernel(program, kname);
|
||||||
cl::NDRange lrange(p_.local_size_0, p_.local_size_1);
|
cl::NDRange local(p_.local_size_0, p_.local_size_1);
|
||||||
cl::NDRange grange = (id==1)?cl::NDRange(align(align(M,p_.mS)/p_.mS, p_.local_size_0), align(align(N,p_.nS)/p_.nS, p_.local_size_1)):
|
cl::NDRange global = (id==1)?cl::NDRange(align(align(M,p_.mS)/p_.mS, p_.local_size_0), align(align(N,p_.nS)/p_.nS, p_.local_size_1)):
|
||||||
cl::NDRange(M/p_.mS, N/p_.nS);
|
cl::NDRange(M/p_.mS, N/p_.nS);
|
||||||
|
|
||||||
unsigned int current_arg = 0;
|
unsigned int current_arg = 0;
|
||||||
@@ -595,10 +595,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
fun.set_arguments(B);
|
fun.set_arguments(B);
|
||||||
fun.set_arguments(beta.dtype(), beta.values());
|
fun.set_arguments(beta.dtype(), beta.values());
|
||||||
|
|
||||||
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
|
options.enqueue_cache(queue,kernel, cl::NullRange, global, local);
|
||||||
|
|
||||||
if(cache)
|
|
||||||
cache->push_back(queue, kernel, cl::NullRange, grange, lrange);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
array_infos mproduct::create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap)
|
array_infos mproduct::create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap)
|
||||||
@@ -649,13 +646,17 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
return infos(expressions, d0, d1, d2);
|
return infos(expressions, d0, d1, d2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, expressions_tuple const & expressions, operation_cache * cache)
|
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, controller<expressions_tuple> const & controller)
|
||||||
{
|
{
|
||||||
using namespace tools;
|
using namespace tools;
|
||||||
|
|
||||||
|
expressions_tuple const & expressions = controller.x();
|
||||||
|
|
||||||
lhs_rhs_element C, A, B;
|
lhs_rhs_element C, A, B;
|
||||||
std::vector<int_t> MNK = infos(expressions, C, A, B);
|
std::vector<int_t> MNK = infos(expressions, C, A, B);
|
||||||
|
|
||||||
|
execution_options_type const & options = controller.execution_options();
|
||||||
|
|
||||||
int_t M = MNK[0];
|
int_t M = MNK[0];
|
||||||
int_t N = MNK[1];
|
int_t N = MNK[1];
|
||||||
int_t K = MNK[2];
|
int_t K = MNK[2];
|
||||||
@@ -687,7 +688,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
{
|
{
|
||||||
enqueue_block(queue, M, N, K, create_slice(pA, 0, M, 0, K, swap_A),
|
enqueue_block(queue, M, N, K, create_slice(pA, 0, M, 0, K, swap_A),
|
||||||
create_slice(pB, 0, K, 0, N, swap_B),
|
create_slice(pB, 0, K, 0, N, swap_B),
|
||||||
create_slice(pC, 0, M, 0, N, false), *_1, *_0, programs, label, 1, cache);
|
create_slice(pC, 0, M, 0, N, false), *_1, *_0, programs, label, 1, options);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -695,17 +696,17 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
int_t lN = N / p_.nL * p_.nL;
|
int_t lN = N / p_.nL * p_.nL;
|
||||||
int_t lK = K / p_.kL * p_.kL;
|
int_t lK = K / p_.kL * p_.kL;
|
||||||
|
|
||||||
enqueue_block(queue, lM, lN, lK, create_slice(pA, 0, lM, 0, lK, swap_A), create_slice(pB, 0, lK, 0, lN, swap_B), create_slice(pC, 0, lM, 0, lN, false), *_1, *_0, programs, label, 0, cache);
|
enqueue_block(queue, lM, lN, lK, create_slice(pA, 0, lM, 0, lK, swap_A), create_slice(pB, 0, lK, 0, lN, swap_B), create_slice(pC, 0, lM, 0, lN, false), *_1, *_0, programs, label, 0, options);
|
||||||
enqueue_block(queue, lM, lN, K - lK, create_slice(pA, 0, lM, lK, K, swap_A), create_slice(pB, lK, K, 0, lN, swap_B), create_slice(pC, 0, lM, 0, lN, false), *_1, *_1, programs, label, 1, cache);
|
enqueue_block(queue, lM, lN, K - lK, create_slice(pA, 0, lM, lK, K, swap_A), create_slice(pB, lK, K, 0, lN, swap_B), create_slice(pC, 0, lM, 0, lN, false), *_1, *_1, programs, label, 1, options);
|
||||||
|
|
||||||
enqueue_block(queue, lM, N - lN, lK, create_slice(pA, 0, lM, 0, lK, swap_A), create_slice(pB, 0, lK, lN, N, swap_B), create_slice(pC, 0, lM, lN, N, false), *_1, *_0, programs, label, 1, cache);
|
enqueue_block(queue, lM, N - lN, lK, create_slice(pA, 0, lM, 0, lK, swap_A), create_slice(pB, 0, lK, lN, N, swap_B), create_slice(pC, 0, lM, lN, N, false), *_1, *_0, programs, label, 1, options);
|
||||||
enqueue_block(queue, lM, N - lN, K - lK, create_slice(pA, 0, lM, lK, K, swap_A), create_slice(pB, lK, K, lN, N, swap_B), create_slice(pC, 0, lM, lN, N, false), *_1, *_1, programs, label, 1, cache);
|
enqueue_block(queue, lM, N - lN, K - lK, create_slice(pA, 0, lM, lK, K, swap_A), create_slice(pB, lK, K, lN, N, swap_B), create_slice(pC, 0, lM, lN, N, false), *_1, *_1, programs, label, 1, options);
|
||||||
|
|
||||||
enqueue_block(queue, M - lM, lN, lK, create_slice(pA, lM, M, 0, lK, swap_A), create_slice(pB, 0, lK, 0, lN, swap_B), create_slice(pC, lM, M, 0, lN, false), *_1, *_0, programs, label, 1, cache);
|
enqueue_block(queue, M - lM, lN, lK, create_slice(pA, lM, M, 0, lK, swap_A), create_slice(pB, 0, lK, 0, lN, swap_B), create_slice(pC, lM, M, 0, lN, false), *_1, *_0, programs, label, 1, options);
|
||||||
enqueue_block(queue, M - lM, lN, K - lK, create_slice(pA, lM, M, lK, K, swap_A), create_slice(pB, lK, K, 0, lN, swap_B), create_slice(pC, lM, M, 0, lN, false), *_1, *_1, programs, label, 1, cache);
|
enqueue_block(queue, M - lM, lN, K - lK, create_slice(pA, lM, M, lK, K, swap_A), create_slice(pB, lK, K, 0, lN, swap_B), create_slice(pC, lM, M, 0, lN, false), *_1, *_1, programs, label, 1, options);
|
||||||
|
|
||||||
enqueue_block(queue, M - lM, N - lN, lK, create_slice(pA, lM, M, 0, lK, swap_A), create_slice(pB, 0, lK, lN, N, swap_B), create_slice(pC, lM, M, lN, N, false), *_1, *_0, programs, label, 1, cache);
|
enqueue_block(queue, M - lM, N - lN, lK, create_slice(pA, lM, M, 0, lK, swap_A), create_slice(pB, 0, lK, lN, N, swap_B), create_slice(pC, lM, M, lN, N, false), *_1, *_0, programs, label, 1, options);
|
||||||
enqueue_block(queue, M - lM, N - lN, K - lK, create_slice(pA, lM, M, lK, K, swap_A), create_slice(pB, lK, K, lN, N, swap_B), create_slice(pC, lM, M, lN, N, false), *_1, *_1, programs, label, 1, cache);
|
enqueue_block(queue, M - lM, N - lN, K - lK, create_slice(pA, lM, M, lK, K, swap_A), create_slice(pB, lK, K, lN, N, swap_B), create_slice(pC, lM, M, lN, N, false), *_1, *_1, programs, label, 1, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@@ -214,9 +214,10 @@ std::vector<int_t> mreduction::input_sizes(expressions_tuple const & expressions
|
|||||||
return tools::make_vector<int_t>() << MN.first << MN.second;
|
return tools::make_vector<int_t>() << MN.first << MN.second;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mreduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
void mreduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, controller<expressions_tuple> const & controller)
|
||||||
unsigned int label, expressions_tuple const & expressions, operation_cache * cache)
|
|
||||||
{
|
{
|
||||||
|
expressions_tuple const & expressions = controller.x();
|
||||||
|
|
||||||
char kname[10];
|
char kname[10];
|
||||||
fill_kernel_name(kname, label, "d");
|
fill_kernel_name(kname, label, "d");
|
||||||
std::vector<int_t> MN = input_sizes(expressions);
|
std::vector<int_t> MN = input_sizes(expressions);
|
||||||
@@ -229,18 +230,15 @@ void mreduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_comp
|
|||||||
cl::Kernel kernel(program, kname);
|
cl::Kernel kernel(program, kname);
|
||||||
|
|
||||||
//NDRange
|
//NDRange
|
||||||
cl::NDRange grange(p_.local_size_0*p_.num_groups_0, p_.local_size_1);
|
cl::NDRange global(p_.local_size_0*p_.num_groups_0, p_.local_size_1);
|
||||||
cl::NDRange lrange(p_.local_size_0, p_.local_size_1);
|
cl::NDRange local(p_.local_size_0, p_.local_size_1);
|
||||||
|
|
||||||
unsigned int current_arg = 0;
|
unsigned int current_arg = 0;
|
||||||
kernel.setArg(current_arg++, cl_uint(MN[0]));
|
kernel.setArg(current_arg++, cl_uint(MN[0]));
|
||||||
kernel.setArg(current_arg++, cl_uint(MN[1]));
|
kernel.setArg(current_arg++, cl_uint(MN[1]));
|
||||||
set_arguments(expressions, kernel, current_arg);
|
set_arguments(expressions, kernel, current_arg);
|
||||||
|
|
||||||
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
|
controller.execution_options().enqueue_cache(queue, kernel, cl::NullRange, global, local);
|
||||||
|
|
||||||
if(cache)
|
|
||||||
cache->push_back(queue, kernel, cl::NullRange, grange, lrange);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mreduction_rows::mreduction_rows(mreduction_parameters const & parameters,
|
mreduction_rows::mreduction_rows(mreduction_parameters const & parameters,
|
||||||
|
@@ -280,9 +280,10 @@ std::vector<int_t> reduction::input_sizes(expressions_tuple const & expressions)
|
|||||||
return tools::make_vector<int_t>() << N;
|
return tools::make_vector<int_t>() << N;
|
||||||
}
|
}
|
||||||
|
|
||||||
void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, controller<expressions_tuple> const & controller)
|
||||||
unsigned int label, expressions_tuple const & expressions, operation_cache * cache)
|
|
||||||
{
|
{
|
||||||
|
expressions_tuple const & expressions = controller.x();
|
||||||
|
|
||||||
//Preprocessing
|
//Preprocessing
|
||||||
int_t size = input_sizes(expressions)[0];
|
int_t size = input_sizes(expressions)[0];
|
||||||
std::vector<array_expression::node const *> reductions;
|
std::vector<array_expression::node const *> reductions;
|
||||||
@@ -307,8 +308,8 @@ void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compi
|
|||||||
cl::Kernel(program, fallback?kfallback[1]:kopt[1]) };
|
cl::Kernel(program, fallback?kfallback[1]:kopt[1]) };
|
||||||
|
|
||||||
//NDRange
|
//NDRange
|
||||||
cl::NDRange grange[2] = { cl::NDRange(p_.local_size_0*p_.num_groups), cl::NDRange(p_.local_size_0) };
|
cl::NDRange global[2] = { cl::NDRange(p_.local_size_0*p_.num_groups), cl::NDRange(p_.local_size_0) };
|
||||||
cl::NDRange lrange[2] = { cl::NDRange(p_.local_size_0), cl::NDRange(p_.local_size_0) };
|
cl::NDRange local[2] = { cl::NDRange(p_.local_size_0), cl::NDRange(p_.local_size_0) };
|
||||||
|
|
||||||
//Arguments
|
//Arguments
|
||||||
cl::Context context = expressions.context();
|
cl::Context context = expressions.context();
|
||||||
@@ -340,11 +341,7 @@ void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compi
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int k = 0; k < 2; k++)
|
for (unsigned int k = 0; k < 2; k++)
|
||||||
queue.enqueueNDRangeKernel(kernels[k], cl::NullRange, grange[k], lrange[k]);
|
controller.execution_options().enqueue_cache(queue, kernels[k], cl::NullRange, global[k], local[k]);
|
||||||
|
|
||||||
if(cache)
|
|
||||||
for (unsigned int k = 0; k < 2; k++)
|
|
||||||
cache->push_back(queue, kernels[k], cl::NullRange, grange[k], lrange[k]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template class base_impl<reduction, reduction_parameters>;
|
template class base_impl<reduction, reduction_parameters>;
|
||||||
|
@@ -108,9 +108,9 @@ std::vector<int_t> vaxpy::input_sizes(expressions_tuple const & expressions)
|
|||||||
return tools::make_vector<int_t>() << size;
|
return tools::make_vector<int_t>() << size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vaxpy::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
void vaxpy::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, controller<expressions_tuple> const & controller)
|
||||||
unsigned int label, expressions_tuple const & expressions, operation_cache * cache)
|
|
||||||
{
|
{
|
||||||
|
expressions_tuple const & expressions = controller.x();
|
||||||
//Size
|
//Size
|
||||||
int_t size = input_sizes(expressions)[0];
|
int_t size = input_sizes(expressions)[0];
|
||||||
//Kernel
|
//Kernel
|
||||||
@@ -128,16 +128,14 @@ void vaxpy::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler>
|
|||||||
cl::Kernel & kernel = it->second;
|
cl::Kernel & kernel = it->second;
|
||||||
|
|
||||||
//NDRange
|
//NDRange
|
||||||
cl::NDRange grange(p_.local_size_0*p_.num_groups);
|
cl::NDRange global(p_.local_size_0*p_.num_groups);
|
||||||
cl::NDRange lrange(p_.local_size_0);
|
cl::NDRange local(p_.local_size_0);
|
||||||
//Arguments
|
//Arguments
|
||||||
unsigned int current_arg = 0;
|
unsigned int current_arg = 0;
|
||||||
kernel.setArg(current_arg++, cl_uint(size));
|
kernel.setArg(current_arg++, cl_uint(size));
|
||||||
set_arguments(expressions, kernel, current_arg);
|
set_arguments(expressions, kernel, current_arg);
|
||||||
queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange);
|
|
||||||
|
|
||||||
if(cache)
|
controller.execution_options().enqueue_cache(queue, kernel, cl::NullRange, global, local);
|
||||||
cache->push_back(queue, kernel, cl::NullRange, grange, lrange);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -46,18 +46,19 @@ void model::fill_program_name(char* program_name, expressions_tuple const & expr
|
|||||||
delete binder;
|
delete binder;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<cl_ext::lazy_compiler>& model::init(expressions_tuple const & expressions, runtime_options const & opt)
|
std::vector<cl_ext::lazy_compiler>& model::init(controller<expressions_tuple> const & expressions)
|
||||||
{
|
{
|
||||||
cl::Context const & context = expressions.context();
|
cl::Context const & context = expressions.x().context();
|
||||||
std::string pname;
|
std::string pname;
|
||||||
|
compilation_options_type const & opt = expressions.compilation_options();
|
||||||
if(opt.program_name.empty())
|
if(opt.program_name.empty())
|
||||||
{
|
{
|
||||||
char program_name[256];
|
char program_name[256];
|
||||||
fill_program_name(program_name, expressions, BIND_TO_HANDLE);
|
fill_program_name(program_name, expressions.x(), BIND_TO_HANDLE);
|
||||||
pname = std::string(program_name);
|
pname = std::string(program_name);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
pname = opt.program_name;
|
pname = expressions.compilation_options().program_name;
|
||||||
std::vector<cl_ext::lazy_compiler> & to_init = lazy_programs_[context()][pname];
|
std::vector<cl_ext::lazy_compiler> & to_init = lazy_programs_[context()][pname];
|
||||||
if(to_init.empty())
|
if(to_init.empty())
|
||||||
{
|
{
|
||||||
@@ -72,7 +73,7 @@ std::vector<cl_ext::lazy_compiler>& model::init(expressions_tuple const & expres
|
|||||||
|
|
||||||
for(size_t i = 0 ; i < templates_.size() ; ++i)
|
for(size_t i = 0 ; i < templates_.size() ; ++i)
|
||||||
{
|
{
|
||||||
std::vector<std::string> cur = templates_[i]->generate(i, expressions, device);
|
std::vector<std::string> cur = templates_[i]->generate(i, expressions.x(), device);
|
||||||
for(size_t j = 0 ; j < cur.size() ; ++j){
|
for(size_t j = 0 ; j < cur.size() ; ++j){
|
||||||
to_init[j].add(cur[j]);
|
to_init[j].add(cur[j]);
|
||||||
}
|
}
|
||||||
@@ -91,19 +92,19 @@ model::model(std::vector< std::shared_ptr<base> > const & templates, cl::Command
|
|||||||
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
|
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void model::execute(expressions_tuple const & expressions, operation_cache *cache, runtime_options const & opt)
|
void model::execute(controller<expressions_tuple> const & expressions)
|
||||||
{
|
{
|
||||||
std::vector<cl_ext::lazy_compiler> & compilers = init(expressions, opt);
|
std::vector<cl_ext::lazy_compiler> & compilers = init(expressions);
|
||||||
|
|
||||||
//Prediction
|
//Prediction
|
||||||
int label = 0;
|
int label = 0;
|
||||||
if(opt.label>=0)
|
if(expressions.dispatcher_options().label>=0)
|
||||||
{
|
{
|
||||||
label = opt.label;
|
label = expressions.dispatcher_options().label;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::vector<int_t> x = templates_[0]->input_sizes(expressions);
|
std::vector<int_t> x = templates_[0]->input_sizes(expressions.x());
|
||||||
//The user tuned the model specifically for this input size
|
//The user tuned the model specifically for this input size
|
||||||
if(hardcoded_.find(x)!=hardcoded_.end())
|
if(hardcoded_.find(x)!=hardcoded_.end())
|
||||||
label = hardcoded_.at(x);
|
label = hardcoded_.at(x);
|
||||||
@@ -116,10 +117,10 @@ void model::execute(expressions_tuple const & expressions, operation_cache *cach
|
|||||||
}
|
}
|
||||||
|
|
||||||
//Execution
|
//Execution
|
||||||
return templates_[label]->enqueue(queue_, compilers, label, expressions, cache);
|
return templates_[label]->enqueue(queue_, compilers, label, expressions);
|
||||||
}
|
}
|
||||||
|
|
||||||
void model::tune(expressions_tuple const & expressions)
|
void model::tune(controller<expressions_tuple> const & expressions)
|
||||||
{
|
{
|
||||||
std::vector<cl_ext::lazy_compiler> & compilers = init(expressions);
|
std::vector<cl_ext::lazy_compiler> & compilers = init(expressions);
|
||||||
|
|
||||||
@@ -135,7 +136,7 @@ void model::tune(expressions_tuple const & expressions)
|
|||||||
}
|
}
|
||||||
|
|
||||||
//Fill the override
|
//Fill the override
|
||||||
std::vector<int_t> x = templates_[0]->input_sizes(expressions);
|
std::vector<int_t> x = templates_[0]->input_sizes(expressions.x());
|
||||||
hardcoded_[x] = std::distance(timings.begin(),std::min_element(timings.begin(), timings.end()));
|
hardcoded_[x] = std::distance(timings.begin(),std::min_element(timings.begin(), timings.end()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -147,11 +147,12 @@ namespace atidlas
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** @brief Executes a array_expression on the given models map*/
|
/** @brief Executes a array_expression on the given models map*/
|
||||||
void execute(atidlas::array_expression & array_expression, model_map_t & models, operation_cache * cache)
|
void execute(controller<array_expression> const & c, model_map_t & models)
|
||||||
{
|
{
|
||||||
cl::Context const & context = array_expression.context();
|
array_expression expression = c.x();
|
||||||
size_t rootidx = array_expression.root();
|
cl::Context const & context = expression.context();
|
||||||
array_expression::container_type & tree = const_cast<array_expression::container_type &>(array_expression.tree());
|
size_t rootidx = expression.root();
|
||||||
|
array_expression::container_type & tree = const_cast<array_expression::container_type &>(expression.tree());
|
||||||
array_expression::node root_save = tree[rootidx];
|
array_expression::node root_save = tree[rootidx];
|
||||||
|
|
||||||
//Todo: technically the datatype should be per temporary
|
//Todo: technically the datatype should be per temporary
|
||||||
@@ -207,7 +208,7 @@ namespace atidlas
|
|||||||
tree[rootidx].rhs.type_family = rit->second->type_family;
|
tree[rootidx].rhs.type_family = rit->second->type_family;
|
||||||
|
|
||||||
//Execute
|
//Execute
|
||||||
pmodel->execute(array_expression);
|
pmodel->execute(controller<expressions_tuple>(expression, c.execution_options(), c.dispatcher_options(), c.compilation_options()));
|
||||||
tree[rootidx] = root_save;
|
tree[rootidx] = root_save;
|
||||||
|
|
||||||
//Incorporates the temporary within the array_expression
|
//Incorporates the temporary within the array_expression
|
||||||
@@ -215,7 +216,7 @@ namespace atidlas
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*-----Compute final expression-----*/
|
/*-----Compute final expression-----*/
|
||||||
models[std::make_pair(final_type, dtype)]->execute(array_expression, cache);
|
models[std::make_pair(final_type, dtype)]->execute(controller<expressions_tuple>(expression, c.execution_options(), c.dispatcher_options(), c.compilation_options()));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -209,7 +209,8 @@ namespace detail
|
|||||||
else if(name=="uint64") return atd::ULONG_TYPE;
|
else if(name=="uint64") return atd::ULONG_TYPE;
|
||||||
else if(name=="float32") return atd::FLOAT_TYPE;
|
else if(name=="float32") return atd::FLOAT_TYPE;
|
||||||
else if(name=="float64") return atd::DOUBLE_TYPE;
|
else if(name=="float64") return atd::DOUBLE_TYPE;
|
||||||
else{
|
else
|
||||||
|
{
|
||||||
PyErr_SetString(PyExc_TypeError, "Data type not understood");
|
PyErr_SetString(PyExc_TypeError, "Data type not understood");
|
||||||
bp::throw_error_already_set();
|
bp::throw_error_already_set();
|
||||||
throw;
|
throw;
|
||||||
@@ -233,7 +234,8 @@ namespace detail
|
|||||||
else if(name=="mproduct_tn") return atd::MATRIX_PRODUCT_TN_TYPE;
|
else if(name=="mproduct_tn") return atd::MATRIX_PRODUCT_TN_TYPE;
|
||||||
else if(name=="mproduct_nt") return atd::MATRIX_PRODUCT_NT_TYPE;
|
else if(name=="mproduct_nt") return atd::MATRIX_PRODUCT_NT_TYPE;
|
||||||
else if(name=="mproduct_tt") return atd::MATRIX_PRODUCT_TT_TYPE;
|
else if(name=="mproduct_tt") return atd::MATRIX_PRODUCT_TT_TYPE;
|
||||||
else{
|
else
|
||||||
|
{
|
||||||
PyErr_SetString(PyExc_TypeError, "Template type not understood");
|
PyErr_SetString(PyExc_TypeError, "Template type not understood");
|
||||||
bp::throw_error_already_set();
|
bp::throw_error_already_set();
|
||||||
throw;
|
throw;
|
||||||
|
Reference in New Issue
Block a user