Now using a list of event instead of a single one

This commit is contained in:
Philippe Tillet
2015-02-08 00:56:24 -05:00
parent b768e913c9
commit 9c68704f09
7 changed files with 156 additions and 137 deletions

View File

@@ -22,8 +22,25 @@ template<class T>
void bench(ad::numeric_type dtype)
{
unsigned int dtsize = ad::size_of(dtype);
cl::CommandQueue & queue = ad::cl_ext::queues[ad::cl_ext::default_context()][0];
#define BENCHMARK_OPENCL(OP, PERF) \
#define BENCHMARK_ATIDLAS(OP, PERF) \
{\
std::vector<long> times;\
double total_time = 0;\
while(total_time*1e-9 < 1e-1){\
std::list<cl::Event> events;\
OP;\
queue.finish();\
times.push_back(std::accumulate(events.begin(), events.end(), 0, \
[](unsigned long sum, cl::Event const & e){ return sum + e.getProfilingInfo<CL_PROFILING_COMMAND_END>() - e.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>();}));\
total_time+=times.back();\
}\
double t = median(times);\
std::cout << " " << PERF << std::flush;\
}
#define BENCHMARK_CLAMDBLAS(OP, PERF) \
{\
std::vector<long> times;\
double total_time = 0;\
@@ -69,41 +86,40 @@ void bench(ad::numeric_type dtype)
std::cout << " " << PERF << std::flush;\
}
// /*---------*/
// /*--BLAS1--*/
// /*---------*/
// std::cout << "#AXPY" << std::endl;
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
// {
// std::cout << N;
// ad::array x(N, dtype), y(N, dtype);
// cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
// /* ATIDLAS */
// y = x + y; queue.flush(); queue.finish();
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &event)), 3*N*dtsize/t)
// /* clAmdBlas */
//#ifdef BENCH_CLAMDBLAS
// BENCHMARK_OPENCL(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
//#endif
// /* BLAS */
//#ifdef BENCH_CBLAS
// std::vector<float> cx(N), cy(N);
// ad::copy(x, cx);
// ad::copy(y, cy);
// BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
//#endif
// /* CuBLAS */
//#ifdef BENCH_CUBLAS
// T *cux, *cuy;
// cudaMalloc((void**) &cux, N * sizeof(T));
// cudaMalloc((void**) &cuy, N * sizeof(T));
// BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
// cudaFree(cux);
// cudaFree(cuy);
//#endif
// std::cout << std::endl;
// }
// std::cout << "\n\n" << std::flush;
/*---------*/
/*--BLAS1--*/
/*---------*/
std::cout << "#AXPY" << std::endl;
for(int_t N : create_log_range(1e3, 2e7, 50, 64))
{
std::cout << N;
ad::array x(N, dtype), y(N, dtype);
/* ATIDLAS */
y = x + y; queue.flush(); queue.finish();
BENCHMARK_ATIDLAS(y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &events)), 3*N*dtsize/t)
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK_CLAMDBLAS(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
ad::copy(x, cx);
ad::copy(y, cy);
BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
#endif
/* CuBLAS */
#ifdef BENCH_CUBLAS
T *cux, *cuy;
cudaMalloc((void**) &cux, N * sizeof(T));
cudaMalloc((void**) &cuy, N * sizeof(T));
BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
cudaFree(cux);
cudaFree(cuy);
#endif
std::cout << std::endl;
}
std::cout << "\n\n" << std::flush;
// std::cout << "#DOT" << std::endl;
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
@@ -111,7 +127,6 @@ void bench(ad::numeric_type dtype)
// std::cout << N;
// /* ATIDLAS */
// ad::array x(N, dtype), y(N, dtype);
// cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
// ad::array scratch(N, dtype);
// ad::scalar s(dtype);
// s = dot(x,y); queue.flush(); queue.finish();
@@ -140,43 +155,42 @@ void bench(ad::numeric_type dtype)
// }
// std::cout << "\n\n" << std::flush;
/*---------*/
/*--BLAS2--*/
/*---------*/
//T-layout
std::cout << "#GEMV-T" << std::endl;
for(int_t N: std::vector<int>{64})
for(int_t M: create_full_range(128, 10000, 64))
{
std::cout << M << "," << N;
/* ATIDLAS */
ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
y = dot(trans(A),x); queue.flush(); queue.finish();
BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
#ifdef BENCH_CLAMDBLAS
BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
#endif
#ifdef BENCH_CBLAS
std::vector<float> cA(N*M), cx(N), cy(M);
ad::copy(x, cx);
ad::copy(y, cy);
ad::copy(A, cA);
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
#endif
#ifdef BENCH_CUBLAS
T *cuA, *cux, *cuy;
cudaMalloc((void**) &cuA, N * M * sizeof(T));
cudaMalloc((void**) &cux, N * sizeof(T));
cudaMalloc((void**) &cuy, M * sizeof(T));
BENCHMARK_CUDA(cublasSgemv(cublasTrans, N, M, 1, cuA, N, cux, 1, 0, cuy, 1), (M*N + M + N)*dtsize/t)
cudaFree(cuA);
cudaFree(cux);
cudaFree(cuy);
#endif
std::cout << std::endl;
}
std::cout << "\n\n" << std::flush;
// /*---------*/
// /*--BLAS2--*/
// /*---------*/
// //T-layout
// std::cout << "#GEMV-T" << std::endl;
// for(int_t N: std::vector<int>{64})
// for(int_t M: create_full_range(128, 10000, 64))
// {
// std::cout << M << "," << N;
// /* ATIDLAS */
// ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
// y = dot(trans(A),x); queue.flush(); queue.finish();
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
// #ifdef BENCH_CLAMDBLAS
// BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
// #endif
// #ifdef BENCH_CBLAS
// std::vector<float> cA(N*M), cx(N), cy(M);
// ad::copy(x, cx);
// ad::copy(y, cy);
// ad::copy(A, cA);
// BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
// #endif
// #ifdef BENCH_CUBLAS
// T *cuA, *cux, *cuy;
// cudaMalloc((void**) &cuA, N * M * sizeof(T));
// cudaMalloc((void**) &cux, N * sizeof(T));
// cudaMalloc((void**) &cuy, M * sizeof(T));
// BENCHMARK_CUDA(cublasSgemv(cublasTrans, N, M, 1, cuA, N, cux, 1, 0, cuy, 1), (M*N + M + N)*dtsize/t)
// cudaFree(cuA);
// cudaFree(cux);
// cudaFree(cuy);
// #endif
// std::cout << std::endl;
// }
// std::cout << "\n\n" << std::flush;
//// /*---------*/
//// /*--BLAS3--*/

View File

@@ -182,12 +182,12 @@ public:
public:
template<class LT, class RT>
array_expression(LT const & lhs, RT const & rhs, op_element const & op, cl::Context const & ctx, numeric_type const & dtype, size4 const & shape);
array_expression(LT const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
template<class RT>
array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
template<class LT>
array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
size4 shape() const;
array_expression& reshape(int_t size1, int_t size2=1);
@@ -218,17 +218,18 @@ class operation_cache
cl::NDRange global;
cl::NDRange local;
std::vector<cl::Event>* dependencies;
cl::Event* event;
};
public:
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies, cl::Event* event)
{ l_.push_back({queue, kernel, offset, global, local, dependencies, event}); }
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies)
{ l_.push_back({queue, kernel, offset, global, local, dependencies}); }
void enqueue()
void enqueue(std::list<cl::Event>* events = NULL)
{
for(infos & i : l_)
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, i.event);
for(infos & i : l_){
events->push_back(cl::Event());
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, &events->back());
}
}
private:
@@ -237,17 +238,22 @@ private:
struct execution_options_type
{
execution_options_type(unsigned int _queue_id = 0, cl::Event* _event = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), event(_event), cache(_cache), dependencies(_dependencies){}
execution_options_type(unsigned int _queue_id = 0, std::list<cl::Event>* _events = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), events(_events), cache(_cache), dependencies(_dependencies){}
void enqueue_cache(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange offset, cl::NDRange global, cl::NDRange local) const
{
cl::Event* event = NULL;
if(events){
events->push_back(cl::Event());
event = &events->back();
}
queue.enqueueNDRangeKernel(kernel, offset, global, local, dependencies, event);
if(cache)
cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies, event);
cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies);
}
unsigned int queue_id;
cl::Event* event;
std::list<cl::Event>* events;
operation_cache* cache;
std::vector<cl::Event>* dependencies;
};

View File

@@ -139,7 +139,8 @@ template<class TYPE>
array& array::operator=(controller<TYPE> const & c)
{
assert(dtype_ == c.x().dtype());
execute(controller<array_expression>(detail::assign(*this, c.x()), c.execution_options(), c.dispatcher_options(), c.compilation_options()),
array_expression expression(*this, c.x(), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
execute(controller<array_expression>(expression, c.execution_options(), c.dispatcher_options(), c.compilation_options()),
atidlas::get_model_map(cl_ext::queues[context_][c.execution_options().queue_id]));
return *this;
}
@@ -180,7 +181,7 @@ array & array::operator+=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
array & array::operator+=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), dtype_, shape_); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), rhs.context(), dtype_, shape_); }
//----
array & array::operator-=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
@@ -189,7 +190,7 @@ array & array::operator-=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
array & array::operator-=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), rhs.context(), dtype_, shape_); }
//----
array & array::operator*=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
@@ -198,7 +199,7 @@ array & array::operator*=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
array & array::operator*=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), dtype_, shape_); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), rhs.context(), dtype_, shape_); }
//----
array & array::operator/=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
@@ -207,7 +208,7 @@ array & array::operator/=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
array & array::operator/=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), dtype_, shape_); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), rhs.context(), dtype_, shape_); }
array_expression array::T() const
{ return atidlas::trans(*this) ;}
@@ -394,28 +395,28 @@ bool check_elementwise(U const & u, V const & v)
#define DEFINE_ELEMENT_BINARY_OPERATOR(OP, OPNAME, DTYPE) \
array_expression OPNAME (array_expression const & x, array_expression const & y) \
{ assert(check_elementwise(x, y));\
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
\
array_expression OPNAME (array const & x, array_expression const & y) \
{ assert(check_elementwise(x, y));\
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
\
array_expression OPNAME (array_expression const & x, array const & y) \
{ assert(check_elementwise(x, y));\
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
\
array_expression OPNAME (array const & x, array const & y) \
{ assert(check_elementwise(x, y));\
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); }\
\
array_expression OPNAME (array_expression const & x, value_scalar const & y) \
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, x.shape()); } \
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } \
\
array_expression OPNAME (array const & x, value_scalar const & y) \
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }\
\
array_expression OPNAME (value_scalar const & y, array_expression const & x) \
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, x.shape()); } \
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } \
\
array_expression OPNAME (value_scalar const & y, array const & x) \
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }
@@ -458,7 +459,7 @@ array_expression OPNAME (array const & x) \
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }\
\
array_expression OPNAME (array_expression const & x) \
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.dtype(), x.shape()); }
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }
DEFINE_ELEMENT_UNARY_OPERATOR((x.dtype()==FLOAT_TYPE || x.dtype()==DOUBLE_TYPE)?OPERATOR_FABS_TYPE:OPERATOR_ABS_TYPE, abs)
DEFINE_ELEMENT_UNARY_OPERATOR(OPERATOR_ACOS_TYPE, acos)
@@ -506,7 +507,7 @@ array_expression cast(array const & x, numeric_type dtype)
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
array_expression cast(array_expression const & x, numeric_type dtype)
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), dtype, x.shape()); }
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
atidlas::array_expression eye(std::size_t M, std::size_t N, atidlas::numeric_type dtype, cl::Context ctx)
{ return array_expression(value_scalar(1), value_scalar(0), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_VDIAG_TYPE), ctx, dtype, size4(M, N)); }
@@ -524,7 +525,7 @@ array_expression trans(array const & x) \
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }\
\
array_expression trans(array_expression const & x) \
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.dtype(), flip(x.shape())); }
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }
array_expression repmat(array const & A, int_t const & rep1, int_t const & rep2)
{
@@ -543,7 +544,7 @@ array_expression repmat(array_expression const & A, int_t const & rep1, int_t co
infos.rep2 = rep2;
infos.sub1 = A.shape()._1;
infos.sub2 = A.shape()._2;
return array_expression(A, infos, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_REPEAT_TYPE), A.dtype(), size4(infos.rep1*infos.sub1, infos.rep2*infos.sub2));
return array_expression(A, infos, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_REPEAT_TYPE), A.context(), A.dtype(), size4(infos.rep1*infos.sub1, infos.rep2*infos.sub2));
}
////---------------------------------------
@@ -568,11 +569,11 @@ array_expression OPNAME(array_expression const & x, int_t axis)\
if(axis < -1 || axis > x.nshape())\
throw std::out_of_range("The axis entry is out of bounds");\
if(axis==-1)\
return array_expression(x, invalid_node(), op_element(OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(1));\
return array_expression(x, invalid_node(), op_element(OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(1));\
else if(axis==0)\
return array_expression(x, invalid_node(), op_element(OPERATOR_ROWS_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(x.shape()._1));\
return array_expression(x, invalid_node(), op_element(OPERATOR_ROWS_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(x.shape()._1));\
else\
return array_expression(x, invalid_node(), op_element(OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(x.shape()._2));\
return array_expression(x, invalid_node(), op_element(OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(x.shape()._2));\
}
DEFINE_REDUCTION(OPERATOR_ADD_TYPE, sum)
@@ -604,7 +605,7 @@ namespace detail
shape._1 = A.shape()._2;
}
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
if(A_trans) res_root.lhs = A_root.lhs;
return res;
@@ -621,7 +622,7 @@ namespace detail
type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
shape._2 = B.shape()._1;
}
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
if(B_trans) res_root.rhs = B_root.lhs;
return res;
@@ -643,7 +644,7 @@ namespace detail
else if(!A_trans && B_trans) type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
else type = OPERATOR_MATRIX_PRODUCT_NN_TYPE;
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
if(A_trans) res_root.lhs = A_root.lhs;
if(B_trans) res_root.rhs = B_root.lhs;
@@ -667,7 +668,7 @@ namespace detail
bool A_trans = A_root.op.type==OPERATOR_TRANS_TYPE;
if(A_trans)
{
array_expression tmp(A, repmat(x, 1, M), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ELEMENT_PROD_TYPE), A.dtype(), size4(N, M));
array_expression tmp(A, repmat(x, 1, M), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ELEMENT_PROD_TYPE), A.context(), A.dtype(), size4(N, M));
//Remove trans
tmp.tree()[tmp.root()].lhs = A.tree()[A.root()].lhs;
return sum(tmp, 1);

View File

@@ -75,8 +75,8 @@ array_expression::array_expression(LT const & lhs, RT const & rhs, op_element co
}
template<class RT>
array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape) :
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(lhs.context_), dtype_(dtype), shape_(shape)
array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
{
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
fill(tree_[root_].lhs, lhs.root_);
@@ -85,8 +85,8 @@ array_expression::array_expression(array_expression const & lhs, RT const & rhs,
}
template<class LT>
array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape) :
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(rhs.context_), dtype_(dtype), shape_(shape)
array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape) :
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(context), dtype_(dtype), shape_(shape)
{
std::copy(rhs.tree_.begin(), rhs.tree_.end(), tree_.begin());
fill(tree_[root_].lhs, lhs);
@@ -94,8 +94,8 @@ array_expression::array_expression(LT const & lhs, array_expression const & rhs,
fill(tree_[root_].rhs, rhs.root_);
}
array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape):
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(lhs.context_), dtype_(dtype), shape_(shape)
array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape):
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
{
std::size_t lsize = lhs.tree_.size();
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
@@ -110,15 +110,15 @@ array_expression::array_expression(array_expression const & lhs, array_expressio
root_ = tree_.size() - 1;
}
template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, array const &, op_element const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, array const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(array const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(value_scalar const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(invalid_node const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
@@ -140,6 +140,8 @@ template array_expression::array_expression(invalid_node const &, repeat_infos c
template array_expression::array_expression(array const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
template array_expression::array_expression(repeat_infos const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
array_expression::container_type & array_expression::tree()
{ return tree_; }
@@ -169,10 +171,10 @@ array_expression& array_expression::reshape(int_t size1, int_t size2)
}
array_expression array_expression::operator-()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); }
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
array_expression array_expression::operator!()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), INT_TYPE, shape_); }
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
//

View File

@@ -213,7 +213,7 @@ class ArgumentsHandler:
self.blas3_size = map(int, self.blas3_size)
if __name__ == "__main__":
atd.state.queue_properties = atd.queue_properties_type.CL_QUEUE_PROFILING_ENABLE
atd.state.queue_properties = atd.CL_QUEUE_PROFILING_ENABLE
platforms = atd.get_platforms()
devices = [d for platform in platforms for d in platform.get_devices()]

View File

@@ -220,11 +220,9 @@ def benchmark(template, symbolic):
queue.models[template, atd.float32] = atd.model(template, queue)
x = atd.array(symbolic)
atd.synchronize(symbolic.context)
current_time = 0
timings = []
x, event, cache = atd.flush(symbolic)
x, events, cache = atd.flush(symbolic)
atd.synchronize(symbolic.context)
return 1e-9*(event.end - event.start)
return 1e-9*sum([e.end - e.start for e in events])
def sanitize_string(string, keep_chars = ['_']):

View File

@@ -317,13 +317,13 @@ namespace detail
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, int label, std::string const & program_name, bool force_recompile)
{
cl::Event event;
std::list<cl::Event> events;
atd::operation_cache cache;
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &event, &cache, &cdependencies),
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &events, &cache, &cdependencies),
atd::dispatcher_options_type(label), atd::compilation_options_type(program_name, force_recompile))));
return bp::make_tuple(*parray, event, cache);
return bp::make_tuple(*parray, to_list(events.begin(), events.end()), cache);
}
}
@@ -404,16 +404,14 @@ void export_cl()
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
bp::enum_<cl_command_queue_properties>("queue_properties_type")
.value("CL_QUEUE_PROFILING_ENABLE", CL_QUEUE_PROFILING_ENABLE)
.value("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
;
bp::class_<state_type>("state_type")
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties)
;
bp::scope().attr("state") = bp::object(bp::ptr(&state));
bp::scope().attr("CL_QUEUE_PROFILING_ENABLE") = CL_QUEUE_PROFILING_ENABLE;
bp::scope().attr("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE") = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
}
namespace detail