Now using a list of event instead of a single one

This commit is contained in:
Philippe Tillet
2015-02-08 00:56:24 -05:00
parent b768e913c9
commit 9c68704f09
7 changed files with 156 additions and 137 deletions

View File

@@ -22,8 +22,25 @@ template<class T>
void bench(ad::numeric_type dtype) void bench(ad::numeric_type dtype)
{ {
unsigned int dtsize = ad::size_of(dtype); unsigned int dtsize = ad::size_of(dtype);
cl::CommandQueue & queue = ad::cl_ext::queues[ad::cl_ext::default_context()][0];
#define BENCHMARK_OPENCL(OP, PERF) \ #define BENCHMARK_ATIDLAS(OP, PERF) \
{\
std::vector<long> times;\
double total_time = 0;\
while(total_time*1e-9 < 1e-1){\
std::list<cl::Event> events;\
OP;\
queue.finish();\
times.push_back(std::accumulate(events.begin(), events.end(), 0, \
[](unsigned long sum, cl::Event const & e){ return sum + e.getProfilingInfo<CL_PROFILING_COMMAND_END>() - e.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>();}));\
total_time+=times.back();\
}\
double t = median(times);\
std::cout << " " << PERF << std::flush;\
}
#define BENCHMARK_CLAMDBLAS(OP, PERF) \
{\ {\
std::vector<long> times;\ std::vector<long> times;\
double total_time = 0;\ double total_time = 0;\
@@ -69,41 +86,40 @@ void bench(ad::numeric_type dtype)
std::cout << " " << PERF << std::flush;\ std::cout << " " << PERF << std::flush;\
} }
// /*---------*/ /*---------*/
// /*--BLAS1--*/ /*--BLAS1--*/
// /*---------*/ /*---------*/
// std::cout << "#AXPY" << std::endl; std::cout << "#AXPY" << std::endl;
// for(int_t N : create_log_range(1e3, 2e7, 50, 64)) for(int_t N : create_log_range(1e3, 2e7, 50, 64))
// { {
// std::cout << N; std::cout << N;
// ad::array x(N, dtype), y(N, dtype); ad::array x(N, dtype), y(N, dtype);
// cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0]; /* ATIDLAS */
// /* ATIDLAS */ y = x + y; queue.flush(); queue.finish();
// y = x + y; queue.flush(); queue.finish(); BENCHMARK_ATIDLAS(y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &events)), 3*N*dtsize/t)
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &event)), 3*N*dtsize/t) /* clAmdBlas */
// /* clAmdBlas */ #ifdef BENCH_CLAMDBLAS
//#ifdef BENCH_CLAMDBLAS BENCHMARK_CLAMDBLAS(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
// BENCHMARK_OPENCL(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t) #endif
//#endif /* BLAS */
// /* BLAS */ #ifdef BENCH_CBLAS
//#ifdef BENCH_CBLAS std::vector<float> cx(N), cy(N);
// std::vector<float> cx(N), cy(N); ad::copy(x, cx);
// ad::copy(x, cx); ad::copy(y, cy);
// ad::copy(y, cy); BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
// BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t); #endif
//#endif /* CuBLAS */
// /* CuBLAS */ #ifdef BENCH_CUBLAS
//#ifdef BENCH_CUBLAS T *cux, *cuy;
// T *cux, *cuy; cudaMalloc((void**) &cux, N * sizeof(T));
// cudaMalloc((void**) &cux, N * sizeof(T)); cudaMalloc((void**) &cuy, N * sizeof(T));
// cudaMalloc((void**) &cuy, N * sizeof(T)); BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
// BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t) cudaFree(cux);
// cudaFree(cux); cudaFree(cuy);
// cudaFree(cuy); #endif
//#endif std::cout << std::endl;
// std::cout << std::endl; }
// } std::cout << "\n\n" << std::flush;
// std::cout << "\n\n" << std::flush;
// std::cout << "#DOT" << std::endl; // std::cout << "#DOT" << std::endl;
// for(int_t N : create_log_range(1e3, 2e7, 50, 64)) // for(int_t N : create_log_range(1e3, 2e7, 50, 64))
@@ -111,7 +127,6 @@ void bench(ad::numeric_type dtype)
// std::cout << N; // std::cout << N;
// /* ATIDLAS */ // /* ATIDLAS */
// ad::array x(N, dtype), y(N, dtype); // ad::array x(N, dtype), y(N, dtype);
// cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
// ad::array scratch(N, dtype); // ad::array scratch(N, dtype);
// ad::scalar s(dtype); // ad::scalar s(dtype);
// s = dot(x,y); queue.flush(); queue.finish(); // s = dot(x,y); queue.flush(); queue.finish();
@@ -140,43 +155,42 @@ void bench(ad::numeric_type dtype)
// } // }
// std::cout << "\n\n" << std::flush; // std::cout << "\n\n" << std::flush;
/*---------*/ // /*---------*/
/*--BLAS2--*/ // /*--BLAS2--*/
/*---------*/ // /*---------*/
//T-layout // //T-layout
std::cout << "#GEMV-T" << std::endl; // std::cout << "#GEMV-T" << std::endl;
for(int_t N: std::vector<int>{64}) // for(int_t N: std::vector<int>{64})
for(int_t M: create_full_range(128, 10000, 64)) // for(int_t M: create_full_range(128, 10000, 64))
{ // {
std::cout << M << "," << N; // std::cout << M << "," << N;
/* ATIDLAS */ // /* ATIDLAS */
ad::array A(N, M, dtype), y(M, dtype), x(N, dtype); // ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0]; // y = dot(trans(A),x); queue.flush(); queue.finish();
y = dot(trans(A),x); queue.flush(); queue.finish(); // BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t); // #ifdef BENCH_CLAMDBLAS
#ifdef BENCH_CLAMDBLAS // BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t) // #endif
#endif // #ifdef BENCH_CBLAS
#ifdef BENCH_CBLAS // std::vector<float> cA(N*M), cx(N), cy(M);
std::vector<float> cA(N*M), cx(N), cy(M); // ad::copy(x, cx);
ad::copy(x, cx); // ad::copy(y, cy);
ad::copy(y, cy); // ad::copy(A, cA);
ad::copy(A, cA); // BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t); // #endif
#endif // #ifdef BENCH_CUBLAS
#ifdef BENCH_CUBLAS // T *cuA, *cux, *cuy;
T *cuA, *cux, *cuy; // cudaMalloc((void**) &cuA, N * M * sizeof(T));
cudaMalloc((void**) &cuA, N * M * sizeof(T)); // cudaMalloc((void**) &cux, N * sizeof(T));
cudaMalloc((void**) &cux, N * sizeof(T)); // cudaMalloc((void**) &cuy, M * sizeof(T));
cudaMalloc((void**) &cuy, M * sizeof(T)); // BENCHMARK_CUDA(cublasSgemv(cublasTrans, N, M, 1, cuA, N, cux, 1, 0, cuy, 1), (M*N + M + N)*dtsize/t)
BENCHMARK_CUDA(cublasSgemv(cublasTrans, N, M, 1, cuA, N, cux, 1, 0, cuy, 1), (M*N + M + N)*dtsize/t) // cudaFree(cuA);
cudaFree(cuA); // cudaFree(cux);
cudaFree(cux); // cudaFree(cuy);
cudaFree(cuy); // #endif
#endif // std::cout << std::endl;
std::cout << std::endl; // }
} // std::cout << "\n\n" << std::flush;
std::cout << "\n\n" << std::flush;
//// /*---------*/ //// /*---------*/
//// /*--BLAS3--*/ //// /*--BLAS3--*/

View File

@@ -182,12 +182,12 @@ public:
public: public:
template<class LT, class RT> template<class LT, class RT>
array_expression(LT const & lhs, RT const & rhs, op_element const & op, cl::Context const & ctx, numeric_type const & dtype, size4 const & shape); array_expression(LT const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
template<class RT> template<class RT>
array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape); array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
template<class LT> template<class LT>
array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape); array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape); array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
size4 shape() const; size4 shape() const;
array_expression& reshape(int_t size1, int_t size2=1); array_expression& reshape(int_t size1, int_t size2=1);
@@ -218,17 +218,18 @@ class operation_cache
cl::NDRange global; cl::NDRange global;
cl::NDRange local; cl::NDRange local;
std::vector<cl::Event>* dependencies; std::vector<cl::Event>* dependencies;
cl::Event* event;
}; };
public: public:
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies, cl::Event* event) void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies)
{ l_.push_back({queue, kernel, offset, global, local, dependencies, event}); } { l_.push_back({queue, kernel, offset, global, local, dependencies}); }
void enqueue() void enqueue(std::list<cl::Event>* events = NULL)
{ {
for(infos & i : l_) for(infos & i : l_){
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, i.event); events->push_back(cl::Event());
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, &events->back());
}
} }
private: private:
@@ -237,17 +238,22 @@ private:
struct execution_options_type struct execution_options_type
{ {
execution_options_type(unsigned int _queue_id = 0, cl::Event* _event = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), event(_event), cache(_cache), dependencies(_dependencies){} execution_options_type(unsigned int _queue_id = 0, std::list<cl::Event>* _events = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), events(_events), cache(_cache), dependencies(_dependencies){}
void enqueue_cache(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange offset, cl::NDRange global, cl::NDRange local) const void enqueue_cache(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange offset, cl::NDRange global, cl::NDRange local) const
{ {
cl::Event* event = NULL;
if(events){
events->push_back(cl::Event());
event = &events->back();
}
queue.enqueueNDRangeKernel(kernel, offset, global, local, dependencies, event); queue.enqueueNDRangeKernel(kernel, offset, global, local, dependencies, event);
if(cache) if(cache)
cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies, event); cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies);
} }
unsigned int queue_id; unsigned int queue_id;
cl::Event* event; std::list<cl::Event>* events;
operation_cache* cache; operation_cache* cache;
std::vector<cl::Event>* dependencies; std::vector<cl::Event>* dependencies;
}; };

View File

@@ -139,7 +139,8 @@ template<class TYPE>
array& array::operator=(controller<TYPE> const & c) array& array::operator=(controller<TYPE> const & c)
{ {
assert(dtype_ == c.x().dtype()); assert(dtype_ == c.x().dtype());
execute(controller<array_expression>(detail::assign(*this, c.x()), c.execution_options(), c.dispatcher_options(), c.compilation_options()), array_expression expression(*this, c.x(), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
execute(controller<array_expression>(expression, c.execution_options(), c.dispatcher_options(), c.compilation_options()),
atidlas::get_model_map(cl_ext::queues[context_][c.execution_options().queue_id])); atidlas::get_model_map(cl_ext::queues[context_][c.execution_options().queue_id]));
return *this; return *this;
} }
@@ -180,7 +181,7 @@ array & array::operator+=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
array & array::operator+=(array_expression const & rhs) array & array::operator+=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), rhs.context(), dtype_, shape_); }
//---- //----
array & array::operator-=(value_scalar const & rhs) array & array::operator-=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
@@ -189,7 +190,7 @@ array & array::operator-=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
array & array::operator-=(array_expression const & rhs) array & array::operator-=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), rhs.context(), dtype_, shape_); }
//---- //----
array & array::operator*=(value_scalar const & rhs) array & array::operator*=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
@@ -198,7 +199,7 @@ array & array::operator*=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
array & array::operator*=(array_expression const & rhs) array & array::operator*=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), rhs.context(), dtype_, shape_); }
//---- //----
array & array::operator/=(value_scalar const & rhs) array & array::operator/=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
@@ -207,7 +208,7 @@ array & array::operator/=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
array & array::operator/=(array_expression const & rhs) array & array::operator/=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), dtype_, shape_); } { return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), rhs.context(), dtype_, shape_); }
array_expression array::T() const array_expression array::T() const
{ return atidlas::trans(*this) ;} { return atidlas::trans(*this) ;}
@@ -394,28 +395,28 @@ bool check_elementwise(U const & u, V const & v)
#define DEFINE_ELEMENT_BINARY_OPERATOR(OP, OPNAME, DTYPE) \ #define DEFINE_ELEMENT_BINARY_OPERATOR(OP, OPNAME, DTYPE) \
array_expression OPNAME (array_expression const & x, array_expression const & y) \ array_expression OPNAME (array_expression const & x, array_expression const & y) \
{ assert(check_elementwise(x, y));\ { assert(check_elementwise(x, y));\
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
\ \
array_expression OPNAME (array const & x, array_expression const & y) \ array_expression OPNAME (array const & x, array_expression const & y) \
{ assert(check_elementwise(x, y));\ { assert(check_elementwise(x, y));\
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
\ \
array_expression OPNAME (array_expression const & x, array const & y) \ array_expression OPNAME (array_expression const & x, array const & y) \
{ assert(check_elementwise(x, y));\ { assert(check_elementwise(x, y));\
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
\ \
array_expression OPNAME (array const & x, array const & y) \ array_expression OPNAME (array const & x, array const & y) \
{ assert(check_elementwise(x, y));\ { assert(check_elementwise(x, y));\
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); }\ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); }\
\ \
array_expression OPNAME (array_expression const & x, value_scalar const & y) \ array_expression OPNAME (array_expression const & x, value_scalar const & y) \
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, x.shape()); } \ { return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } \
\ \
array_expression OPNAME (array const & x, value_scalar const & y) \ array_expression OPNAME (array const & x, value_scalar const & y) \
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }\ { return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }\
\ \
array_expression OPNAME (value_scalar const & y, array_expression const & x) \ array_expression OPNAME (value_scalar const & y, array_expression const & x) \
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, x.shape()); } \ { return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } \
\ \
array_expression OPNAME (value_scalar const & y, array const & x) \ array_expression OPNAME (value_scalar const & y, array const & x) \
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } { return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }
@@ -458,7 +459,7 @@ array_expression OPNAME (array const & x) \
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }\ { return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }\
\ \
array_expression OPNAME (array_expression const & x) \ array_expression OPNAME (array_expression const & x) \
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.dtype(), x.shape()); } { return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }
DEFINE_ELEMENT_UNARY_OPERATOR((x.dtype()==FLOAT_TYPE || x.dtype()==DOUBLE_TYPE)?OPERATOR_FABS_TYPE:OPERATOR_ABS_TYPE, abs) DEFINE_ELEMENT_UNARY_OPERATOR((x.dtype()==FLOAT_TYPE || x.dtype()==DOUBLE_TYPE)?OPERATOR_FABS_TYPE:OPERATOR_ABS_TYPE, abs)
DEFINE_ELEMENT_UNARY_OPERATOR(OPERATOR_ACOS_TYPE, acos) DEFINE_ELEMENT_UNARY_OPERATOR(OPERATOR_ACOS_TYPE, acos)
@@ -506,7 +507,7 @@ array_expression cast(array const & x, numeric_type dtype)
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); } { return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
array_expression cast(array_expression const & x, numeric_type dtype) array_expression cast(array_expression const & x, numeric_type dtype)
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), dtype, x.shape()); } { return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
atidlas::array_expression eye(std::size_t M, std::size_t N, atidlas::numeric_type dtype, cl::Context ctx) atidlas::array_expression eye(std::size_t M, std::size_t N, atidlas::numeric_type dtype, cl::Context ctx)
{ return array_expression(value_scalar(1), value_scalar(0), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_VDIAG_TYPE), ctx, dtype, size4(M, N)); } { return array_expression(value_scalar(1), value_scalar(0), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_VDIAG_TYPE), ctx, dtype, size4(M, N)); }
@@ -524,7 +525,7 @@ array_expression trans(array const & x) \
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }\ { return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }\
\ \
array_expression trans(array_expression const & x) \ array_expression trans(array_expression const & x) \
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.dtype(), flip(x.shape())); } { return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }
array_expression repmat(array const & A, int_t const & rep1, int_t const & rep2) array_expression repmat(array const & A, int_t const & rep1, int_t const & rep2)
{ {
@@ -543,7 +544,7 @@ array_expression repmat(array_expression const & A, int_t const & rep1, int_t co
infos.rep2 = rep2; infos.rep2 = rep2;
infos.sub1 = A.shape()._1; infos.sub1 = A.shape()._1;
infos.sub2 = A.shape()._2; infos.sub2 = A.shape()._2;
return array_expression(A, infos, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_REPEAT_TYPE), A.dtype(), size4(infos.rep1*infos.sub1, infos.rep2*infos.sub2)); return array_expression(A, infos, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_REPEAT_TYPE), A.context(), A.dtype(), size4(infos.rep1*infos.sub1, infos.rep2*infos.sub2));
} }
////--------------------------------------- ////---------------------------------------
@@ -568,11 +569,11 @@ array_expression OPNAME(array_expression const & x, int_t axis)\
if(axis < -1 || axis > x.nshape())\ if(axis < -1 || axis > x.nshape())\
throw std::out_of_range("The axis entry is out of bounds");\ throw std::out_of_range("The axis entry is out of bounds");\
if(axis==-1)\ if(axis==-1)\
return array_expression(x, invalid_node(), op_element(OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(1));\ return array_expression(x, invalid_node(), op_element(OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(1));\
else if(axis==0)\ else if(axis==0)\
return array_expression(x, invalid_node(), op_element(OPERATOR_ROWS_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(x.shape()._1));\ return array_expression(x, invalid_node(), op_element(OPERATOR_ROWS_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(x.shape()._1));\
else\ else\
return array_expression(x, invalid_node(), op_element(OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(x.shape()._2));\ return array_expression(x, invalid_node(), op_element(OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(x.shape()._2));\
} }
DEFINE_REDUCTION(OPERATOR_ADD_TYPE, sum) DEFINE_REDUCTION(OPERATOR_ADD_TYPE, sum)
@@ -604,7 +605,7 @@ namespace detail
shape._1 = A.shape()._2; shape._1 = A.shape()._2;
} }
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape); array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]); array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
if(A_trans) res_root.lhs = A_root.lhs; if(A_trans) res_root.lhs = A_root.lhs;
return res; return res;
@@ -621,7 +622,7 @@ namespace detail
type = OPERATOR_MATRIX_PRODUCT_NT_TYPE; type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
shape._2 = B.shape()._1; shape._2 = B.shape()._1;
} }
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape); array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]); array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
if(B_trans) res_root.rhs = B_root.lhs; if(B_trans) res_root.rhs = B_root.lhs;
return res; return res;
@@ -643,7 +644,7 @@ namespace detail
else if(!A_trans && B_trans) type = OPERATOR_MATRIX_PRODUCT_NT_TYPE; else if(!A_trans && B_trans) type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
else type = OPERATOR_MATRIX_PRODUCT_NN_TYPE; else type = OPERATOR_MATRIX_PRODUCT_NN_TYPE;
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape); array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]); array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
if(A_trans) res_root.lhs = A_root.lhs; if(A_trans) res_root.lhs = A_root.lhs;
if(B_trans) res_root.rhs = B_root.lhs; if(B_trans) res_root.rhs = B_root.lhs;
@@ -667,7 +668,7 @@ namespace detail
bool A_trans = A_root.op.type==OPERATOR_TRANS_TYPE; bool A_trans = A_root.op.type==OPERATOR_TRANS_TYPE;
if(A_trans) if(A_trans)
{ {
array_expression tmp(A, repmat(x, 1, M), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ELEMENT_PROD_TYPE), A.dtype(), size4(N, M)); array_expression tmp(A, repmat(x, 1, M), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ELEMENT_PROD_TYPE), A.context(), A.dtype(), size4(N, M));
//Remove trans //Remove trans
tmp.tree()[tmp.root()].lhs = A.tree()[A.root()].lhs; tmp.tree()[tmp.root()].lhs = A.tree()[A.root()].lhs;
return sum(tmp, 1); return sum(tmp, 1);

View File

@@ -75,8 +75,8 @@ array_expression::array_expression(LT const & lhs, RT const & rhs, op_element co
} }
template<class RT> template<class RT>
array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape) : array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(lhs.context_), dtype_(dtype), shape_(shape) tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
{ {
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin()); std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
fill(tree_[root_].lhs, lhs.root_); fill(tree_[root_].lhs, lhs.root_);
@@ -85,8 +85,8 @@ array_expression::array_expression(array_expression const & lhs, RT const & rhs,
} }
template<class LT> template<class LT>
array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape) : array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(rhs.context_), dtype_(dtype), shape_(shape) tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(context), dtype_(dtype), shape_(shape)
{ {
std::copy(rhs.tree_.begin(), rhs.tree_.end(), tree_.begin()); std::copy(rhs.tree_.begin(), rhs.tree_.end(), tree_.begin());
fill(tree_[root_].lhs, lhs); fill(tree_[root_].lhs, lhs);
@@ -94,8 +94,8 @@ array_expression::array_expression(LT const & lhs, array_expression const & rhs,
fill(tree_[root_].rhs, rhs.root_); fill(tree_[root_].rhs, rhs.root_);
} }
array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape): array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape):
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(lhs.context_), dtype_(dtype), shape_(shape) tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
{ {
std::size_t lsize = lhs.tree_.size(); std::size_t lsize = lhs.tree_.size();
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin()); std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
@@ -110,15 +110,15 @@ array_expression::array_expression(array_expression const & lhs, array_expressio
root_ = tree_.size() - 1; root_ = tree_.size() - 1;
} }
template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, numeric_type const &, size4 const &); template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, numeric_type const &, size4 const &); template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, array const &, op_element const &, numeric_type const &, size4 const &); template array_expression::array_expression(array_expression const &, array const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, numeric_type const &, size4 const &); template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, numeric_type const &, size4 const &); template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, numeric_type const &, size4 const &); template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array const &, array_expression const &, op_element const &, numeric_type const &, size4 const &); template array_expression::array_expression(array const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, numeric_type const &, size4 const &); template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &); template array_expression::array_expression(value_scalar const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &); template array_expression::array_expression(invalid_node const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
@@ -140,6 +140,8 @@ template array_expression::array_expression(invalid_node const &, repeat_infos c
template array_expression::array_expression(array const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &); template array_expression::array_expression(array const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &); template array_expression::array_expression(repeat_infos const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
array_expression::container_type & array_expression::tree() array_expression::container_type & array_expression::tree()
{ return tree_; } { return tree_; }
@@ -169,10 +171,10 @@ array_expression& array_expression::reshape(int_t size1, int_t size2)
} }
array_expression array_expression::operator-() array_expression array_expression::operator-()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); } { return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
array_expression array_expression::operator!() array_expression array_expression::operator!()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), INT_TYPE, shape_); } { return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
// //

View File

@@ -213,7 +213,7 @@ class ArgumentsHandler:
self.blas3_size = map(int, self.blas3_size) self.blas3_size = map(int, self.blas3_size)
if __name__ == "__main__": if __name__ == "__main__":
atd.state.queue_properties = atd.queue_properties_type.CL_QUEUE_PROFILING_ENABLE atd.state.queue_properties = atd.CL_QUEUE_PROFILING_ENABLE
platforms = atd.get_platforms() platforms = atd.get_platforms()
devices = [d for platform in platforms for d in platform.get_devices()] devices = [d for platform in platforms for d in platform.get_devices()]

View File

@@ -220,11 +220,9 @@ def benchmark(template, symbolic):
queue.models[template, atd.float32] = atd.model(template, queue) queue.models[template, atd.float32] = atd.model(template, queue)
x = atd.array(symbolic) x = atd.array(symbolic)
atd.synchronize(symbolic.context) atd.synchronize(symbolic.context)
current_time = 0 x, events, cache = atd.flush(symbolic)
timings = []
x, event, cache = atd.flush(symbolic)
atd.synchronize(symbolic.context) atd.synchronize(symbolic.context)
return 1e-9*(event.end - event.start) return 1e-9*sum([e.end - e.start for e in events])
def sanitize_string(string, keep_chars = ['_']): def sanitize_string(string, keep_chars = ['_']):

View File

@@ -317,13 +317,13 @@ namespace detail
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, int label, std::string const & program_name, bool force_recompile) bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, int label, std::string const & program_name, bool force_recompile)
{ {
cl::Event event; std::list<cl::Event> events;
atd::operation_cache cache; atd::operation_cache cache;
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies); std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &event, &cache, &cdependencies), boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &events, &cache, &cdependencies),
atd::dispatcher_options_type(label), atd::compilation_options_type(program_name, force_recompile)))); atd::dispatcher_options_type(label), atd::compilation_options_type(program_name, force_recompile))));
return bp::make_tuple(*parray, event, cache); return bp::make_tuple(*parray, to_list(events.begin(), events.end()), cache);
} }
} }
@@ -404,16 +404,14 @@ void export_cl()
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false)); bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
bp::enum_<cl_command_queue_properties>("queue_properties_type")
.value("CL_QUEUE_PROFILING_ENABLE", CL_QUEUE_PROFILING_ENABLE)
.value("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
;
bp::class_<state_type>("state_type") bp::class_<state_type>("state_type")
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties) .def_readwrite("queue_properties",&atd::cl_ext::queue_properties)
; ;
bp::scope().attr("state") = bp::object(bp::ptr(&state)); bp::scope().attr("state") = bp::object(bp::ptr(&state));
bp::scope().attr("CL_QUEUE_PROFILING_ENABLE") = CL_QUEUE_PROFILING_ENABLE;
bp::scope().attr("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE") = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
} }
namespace detail namespace detail