Now using a list of event instead of a single one
This commit is contained in:
162
bench/blas.cpp
162
bench/blas.cpp
@@ -22,8 +22,25 @@ template<class T>
|
||||
void bench(ad::numeric_type dtype)
|
||||
{
|
||||
unsigned int dtsize = ad::size_of(dtype);
|
||||
cl::CommandQueue & queue = ad::cl_ext::queues[ad::cl_ext::default_context()][0];
|
||||
|
||||
#define BENCHMARK_OPENCL(OP, PERF) \
|
||||
#define BENCHMARK_ATIDLAS(OP, PERF) \
|
||||
{\
|
||||
std::vector<long> times;\
|
||||
double total_time = 0;\
|
||||
while(total_time*1e-9 < 1e-1){\
|
||||
std::list<cl::Event> events;\
|
||||
OP;\
|
||||
queue.finish();\
|
||||
times.push_back(std::accumulate(events.begin(), events.end(), 0, \
|
||||
[](unsigned long sum, cl::Event const & e){ return sum + e.getProfilingInfo<CL_PROFILING_COMMAND_END>() - e.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>();}));\
|
||||
total_time+=times.back();\
|
||||
}\
|
||||
double t = median(times);\
|
||||
std::cout << " " << PERF << std::flush;\
|
||||
}
|
||||
|
||||
#define BENCHMARK_CLAMDBLAS(OP, PERF) \
|
||||
{\
|
||||
std::vector<long> times;\
|
||||
double total_time = 0;\
|
||||
@@ -69,41 +86,40 @@ void bench(ad::numeric_type dtype)
|
||||
std::cout << " " << PERF << std::flush;\
|
||||
}
|
||||
|
||||
// /*---------*/
|
||||
// /*--BLAS1--*/
|
||||
// /*---------*/
|
||||
// std::cout << "#AXPY" << std::endl;
|
||||
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
||||
// {
|
||||
// std::cout << N;
|
||||
// ad::array x(N, dtype), y(N, dtype);
|
||||
// cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
|
||||
// /* ATIDLAS */
|
||||
// y = x + y; queue.flush(); queue.finish();
|
||||
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &event)), 3*N*dtsize/t)
|
||||
// /* clAmdBlas */
|
||||
//#ifdef BENCH_CLAMDBLAS
|
||||
// BENCHMARK_OPENCL(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
|
||||
//#endif
|
||||
// /* BLAS */
|
||||
//#ifdef BENCH_CBLAS
|
||||
// std::vector<float> cx(N), cy(N);
|
||||
// ad::copy(x, cx);
|
||||
// ad::copy(y, cy);
|
||||
// BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
|
||||
//#endif
|
||||
// /* CuBLAS */
|
||||
//#ifdef BENCH_CUBLAS
|
||||
// T *cux, *cuy;
|
||||
// cudaMalloc((void**) &cux, N * sizeof(T));
|
||||
// cudaMalloc((void**) &cuy, N * sizeof(T));
|
||||
// BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
|
||||
// cudaFree(cux);
|
||||
// cudaFree(cuy);
|
||||
//#endif
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
// std::cout << "\n\n" << std::flush;
|
||||
/*---------*/
|
||||
/*--BLAS1--*/
|
||||
/*---------*/
|
||||
std::cout << "#AXPY" << std::endl;
|
||||
for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
||||
{
|
||||
std::cout << N;
|
||||
ad::array x(N, dtype), y(N, dtype);
|
||||
/* ATIDLAS */
|
||||
y = x + y; queue.flush(); queue.finish();
|
||||
BENCHMARK_ATIDLAS(y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &events)), 3*N*dtsize/t)
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK_CLAMDBLAS(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cx(N), cy(N);
|
||||
ad::copy(x, cx);
|
||||
ad::copy(y, cy);
|
||||
BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
|
||||
#endif
|
||||
/* CuBLAS */
|
||||
#ifdef BENCH_CUBLAS
|
||||
T *cux, *cuy;
|
||||
cudaMalloc((void**) &cux, N * sizeof(T));
|
||||
cudaMalloc((void**) &cuy, N * sizeof(T));
|
||||
BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
|
||||
cudaFree(cux);
|
||||
cudaFree(cuy);
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << "\n\n" << std::flush;
|
||||
|
||||
// std::cout << "#DOT" << std::endl;
|
||||
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
||||
@@ -111,7 +127,6 @@ void bench(ad::numeric_type dtype)
|
||||
// std::cout << N;
|
||||
// /* ATIDLAS */
|
||||
// ad::array x(N, dtype), y(N, dtype);
|
||||
// cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
|
||||
// ad::array scratch(N, dtype);
|
||||
// ad::scalar s(dtype);
|
||||
// s = dot(x,y); queue.flush(); queue.finish();
|
||||
@@ -140,43 +155,42 @@ void bench(ad::numeric_type dtype)
|
||||
// }
|
||||
// std::cout << "\n\n" << std::flush;
|
||||
|
||||
/*---------*/
|
||||
/*--BLAS2--*/
|
||||
/*---------*/
|
||||
//T-layout
|
||||
std::cout << "#GEMV-T" << std::endl;
|
||||
for(int_t N: std::vector<int>{64})
|
||||
for(int_t M: create_full_range(128, 10000, 64))
|
||||
{
|
||||
std::cout << M << "," << N;
|
||||
/* ATIDLAS */
|
||||
ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||
cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
|
||||
y = dot(trans(A),x); queue.flush(); queue.finish();
|
||||
BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
|
||||
#endif
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cA(N*M), cx(N), cy(M);
|
||||
ad::copy(x, cx);
|
||||
ad::copy(y, cy);
|
||||
ad::copy(A, cA);
|
||||
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
T *cuA, *cux, *cuy;
|
||||
cudaMalloc((void**) &cuA, N * M * sizeof(T));
|
||||
cudaMalloc((void**) &cux, N * sizeof(T));
|
||||
cudaMalloc((void**) &cuy, M * sizeof(T));
|
||||
BENCHMARK_CUDA(cublasSgemv(cublasTrans, N, M, 1, cuA, N, cux, 1, 0, cuy, 1), (M*N + M + N)*dtsize/t)
|
||||
cudaFree(cuA);
|
||||
cudaFree(cux);
|
||||
cudaFree(cuy);
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << "\n\n" << std::flush;
|
||||
// /*---------*/
|
||||
// /*--BLAS2--*/
|
||||
// /*---------*/
|
||||
// //T-layout
|
||||
// std::cout << "#GEMV-T" << std::endl;
|
||||
// for(int_t N: std::vector<int>{64})
|
||||
// for(int_t M: create_full_range(128, 10000, 64))
|
||||
// {
|
||||
// std::cout << M << "," << N;
|
||||
// /* ATIDLAS */
|
||||
// ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||
// y = dot(trans(A),x); queue.flush(); queue.finish();
|
||||
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
|
||||
// #ifdef BENCH_CLAMDBLAS
|
||||
// BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
|
||||
// #endif
|
||||
// #ifdef BENCH_CBLAS
|
||||
// std::vector<float> cA(N*M), cx(N), cy(M);
|
||||
// ad::copy(x, cx);
|
||||
// ad::copy(y, cy);
|
||||
// ad::copy(A, cA);
|
||||
// BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
|
||||
// #endif
|
||||
// #ifdef BENCH_CUBLAS
|
||||
// T *cuA, *cux, *cuy;
|
||||
// cudaMalloc((void**) &cuA, N * M * sizeof(T));
|
||||
// cudaMalloc((void**) &cux, N * sizeof(T));
|
||||
// cudaMalloc((void**) &cuy, M * sizeof(T));
|
||||
// BENCHMARK_CUDA(cublasSgemv(cublasTrans, N, M, 1, cuA, N, cux, 1, 0, cuy, 1), (M*N + M + N)*dtsize/t)
|
||||
// cudaFree(cuA);
|
||||
// cudaFree(cux);
|
||||
// cudaFree(cuy);
|
||||
// #endif
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
// std::cout << "\n\n" << std::flush;
|
||||
|
||||
//// /*---------*/
|
||||
//// /*--BLAS3--*/
|
||||
|
@@ -182,12 +182,12 @@ public:
|
||||
|
||||
public:
|
||||
template<class LT, class RT>
|
||||
array_expression(LT const & lhs, RT const & rhs, op_element const & op, cl::Context const & ctx, numeric_type const & dtype, size4 const & shape);
|
||||
array_expression(LT const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
|
||||
template<class RT>
|
||||
array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
|
||||
array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
|
||||
template<class LT>
|
||||
array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
|
||||
array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
|
||||
array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
|
||||
array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
|
||||
|
||||
size4 shape() const;
|
||||
array_expression& reshape(int_t size1, int_t size2=1);
|
||||
@@ -218,17 +218,18 @@ class operation_cache
|
||||
cl::NDRange global;
|
||||
cl::NDRange local;
|
||||
std::vector<cl::Event>* dependencies;
|
||||
cl::Event* event;
|
||||
};
|
||||
|
||||
public:
|
||||
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies, cl::Event* event)
|
||||
{ l_.push_back({queue, kernel, offset, global, local, dependencies, event}); }
|
||||
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies)
|
||||
{ l_.push_back({queue, kernel, offset, global, local, dependencies}); }
|
||||
|
||||
void enqueue()
|
||||
void enqueue(std::list<cl::Event>* events = NULL)
|
||||
{
|
||||
for(infos & i : l_)
|
||||
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, i.event);
|
||||
for(infos & i : l_){
|
||||
events->push_back(cl::Event());
|
||||
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, &events->back());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -237,17 +238,22 @@ private:
|
||||
|
||||
struct execution_options_type
|
||||
{
|
||||
execution_options_type(unsigned int _queue_id = 0, cl::Event* _event = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), event(_event), cache(_cache), dependencies(_dependencies){}
|
||||
execution_options_type(unsigned int _queue_id = 0, std::list<cl::Event>* _events = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), events(_events), cache(_cache), dependencies(_dependencies){}
|
||||
|
||||
void enqueue_cache(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange offset, cl::NDRange global, cl::NDRange local) const
|
||||
{
|
||||
cl::Event* event = NULL;
|
||||
if(events){
|
||||
events->push_back(cl::Event());
|
||||
event = &events->back();
|
||||
}
|
||||
queue.enqueueNDRangeKernel(kernel, offset, global, local, dependencies, event);
|
||||
if(cache)
|
||||
cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies, event);
|
||||
cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies);
|
||||
}
|
||||
|
||||
unsigned int queue_id;
|
||||
cl::Event* event;
|
||||
std::list<cl::Event>* events;
|
||||
operation_cache* cache;
|
||||
std::vector<cl::Event>* dependencies;
|
||||
};
|
||||
|
@@ -139,7 +139,8 @@ template<class TYPE>
|
||||
array& array::operator=(controller<TYPE> const & c)
|
||||
{
|
||||
assert(dtype_ == c.x().dtype());
|
||||
execute(controller<array_expression>(detail::assign(*this, c.x()), c.execution_options(), c.dispatcher_options(), c.compilation_options()),
|
||||
array_expression expression(*this, c.x(), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
|
||||
execute(controller<array_expression>(expression, c.execution_options(), c.dispatcher_options(), c.compilation_options()),
|
||||
atidlas::get_model_map(cl_ext::queues[context_][c.execution_options().queue_id]));
|
||||
return *this;
|
||||
}
|
||||
@@ -180,7 +181,7 @@ array & array::operator+=(array const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator+=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), dtype_, shape_); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), rhs.context(), dtype_, shape_); }
|
||||
//----
|
||||
array & array::operator-=(value_scalar const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||
@@ -189,7 +190,7 @@ array & array::operator-=(array const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator-=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), rhs.context(), dtype_, shape_); }
|
||||
//----
|
||||
array & array::operator*=(value_scalar const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
|
||||
@@ -198,7 +199,7 @@ array & array::operator*=(array const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator*=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), dtype_, shape_); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), rhs.context(), dtype_, shape_); }
|
||||
//----
|
||||
array & array::operator/=(value_scalar const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
|
||||
@@ -207,7 +208,7 @@ array & array::operator/=(array const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator/=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), dtype_, shape_); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), rhs.context(), dtype_, shape_); }
|
||||
|
||||
array_expression array::T() const
|
||||
{ return atidlas::trans(*this) ;}
|
||||
@@ -394,28 +395,28 @@ bool check_elementwise(U const & u, V const & v)
|
||||
#define DEFINE_ELEMENT_BINARY_OPERATOR(OP, OPNAME, DTYPE) \
|
||||
array_expression OPNAME (array_expression const & x, array_expression const & y) \
|
||||
{ assert(check_elementwise(x, y));\
|
||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
|
||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
|
||||
\
|
||||
array_expression OPNAME (array const & x, array_expression const & y) \
|
||||
{ assert(check_elementwise(x, y));\
|
||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
|
||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
|
||||
\
|
||||
array_expression OPNAME (array_expression const & x, array const & y) \
|
||||
{ assert(check_elementwise(x, y));\
|
||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
|
||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
|
||||
\
|
||||
array_expression OPNAME (array const & x, array const & y) \
|
||||
{ assert(check_elementwise(x, y));\
|
||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); }\
|
||||
\
|
||||
array_expression OPNAME (array_expression const & x, value_scalar const & y) \
|
||||
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, x.shape()); } \
|
||||
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } \
|
||||
\
|
||||
array_expression OPNAME (array const & x, value_scalar const & y) \
|
||||
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }\
|
||||
\
|
||||
array_expression OPNAME (value_scalar const & y, array_expression const & x) \
|
||||
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, x.shape()); } \
|
||||
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } \
|
||||
\
|
||||
array_expression OPNAME (value_scalar const & y, array const & x) \
|
||||
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }
|
||||
@@ -458,7 +459,7 @@ array_expression OPNAME (array const & x) \
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }\
|
||||
\
|
||||
array_expression OPNAME (array_expression const & x) \
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.dtype(), x.shape()); }
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }
|
||||
|
||||
DEFINE_ELEMENT_UNARY_OPERATOR((x.dtype()==FLOAT_TYPE || x.dtype()==DOUBLE_TYPE)?OPERATOR_FABS_TYPE:OPERATOR_ABS_TYPE, abs)
|
||||
DEFINE_ELEMENT_UNARY_OPERATOR(OPERATOR_ACOS_TYPE, acos)
|
||||
@@ -506,7 +507,7 @@ array_expression cast(array const & x, numeric_type dtype)
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
|
||||
|
||||
array_expression cast(array_expression const & x, numeric_type dtype)
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), dtype, x.shape()); }
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
|
||||
|
||||
atidlas::array_expression eye(std::size_t M, std::size_t N, atidlas::numeric_type dtype, cl::Context ctx)
|
||||
{ return array_expression(value_scalar(1), value_scalar(0), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_VDIAG_TYPE), ctx, dtype, size4(M, N)); }
|
||||
@@ -524,7 +525,7 @@ array_expression trans(array const & x) \
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }\
|
||||
\
|
||||
array_expression trans(array_expression const & x) \
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.dtype(), flip(x.shape())); }
|
||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }
|
||||
|
||||
array_expression repmat(array const & A, int_t const & rep1, int_t const & rep2)
|
||||
{
|
||||
@@ -543,7 +544,7 @@ array_expression repmat(array_expression const & A, int_t const & rep1, int_t co
|
||||
infos.rep2 = rep2;
|
||||
infos.sub1 = A.shape()._1;
|
||||
infos.sub2 = A.shape()._2;
|
||||
return array_expression(A, infos, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_REPEAT_TYPE), A.dtype(), size4(infos.rep1*infos.sub1, infos.rep2*infos.sub2));
|
||||
return array_expression(A, infos, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_REPEAT_TYPE), A.context(), A.dtype(), size4(infos.rep1*infos.sub1, infos.rep2*infos.sub2));
|
||||
}
|
||||
|
||||
////---------------------------------------
|
||||
@@ -568,11 +569,11 @@ array_expression OPNAME(array_expression const & x, int_t axis)\
|
||||
if(axis < -1 || axis > x.nshape())\
|
||||
throw std::out_of_range("The axis entry is out of bounds");\
|
||||
if(axis==-1)\
|
||||
return array_expression(x, invalid_node(), op_element(OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(1));\
|
||||
return array_expression(x, invalid_node(), op_element(OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(1));\
|
||||
else if(axis==0)\
|
||||
return array_expression(x, invalid_node(), op_element(OPERATOR_ROWS_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(x.shape()._1));\
|
||||
return array_expression(x, invalid_node(), op_element(OPERATOR_ROWS_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(x.shape()._1));\
|
||||
else\
|
||||
return array_expression(x, invalid_node(), op_element(OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(x.shape()._2));\
|
||||
return array_expression(x, invalid_node(), op_element(OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(x.shape()._2));\
|
||||
}
|
||||
|
||||
DEFINE_REDUCTION(OPERATOR_ADD_TYPE, sum)
|
||||
@@ -604,7 +605,7 @@ namespace detail
|
||||
shape._1 = A.shape()._2;
|
||||
}
|
||||
|
||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
|
||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
|
||||
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
||||
if(A_trans) res_root.lhs = A_root.lhs;
|
||||
return res;
|
||||
@@ -621,7 +622,7 @@ namespace detail
|
||||
type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
|
||||
shape._2 = B.shape()._1;
|
||||
}
|
||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
|
||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
|
||||
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
||||
if(B_trans) res_root.rhs = B_root.lhs;
|
||||
return res;
|
||||
@@ -643,7 +644,7 @@ namespace detail
|
||||
else if(!A_trans && B_trans) type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
|
||||
else type = OPERATOR_MATRIX_PRODUCT_NN_TYPE;
|
||||
|
||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
|
||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
|
||||
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
||||
if(A_trans) res_root.lhs = A_root.lhs;
|
||||
if(B_trans) res_root.rhs = B_root.lhs;
|
||||
@@ -667,7 +668,7 @@ namespace detail
|
||||
bool A_trans = A_root.op.type==OPERATOR_TRANS_TYPE;
|
||||
if(A_trans)
|
||||
{
|
||||
array_expression tmp(A, repmat(x, 1, M), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ELEMENT_PROD_TYPE), A.dtype(), size4(N, M));
|
||||
array_expression tmp(A, repmat(x, 1, M), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ELEMENT_PROD_TYPE), A.context(), A.dtype(), size4(N, M));
|
||||
//Remove trans
|
||||
tmp.tree()[tmp.root()].lhs = A.tree()[A.root()].lhs;
|
||||
return sum(tmp, 1);
|
||||
|
@@ -75,8 +75,8 @@ array_expression::array_expression(LT const & lhs, RT const & rhs, op_element co
|
||||
}
|
||||
|
||||
template<class RT>
|
||||
array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape) :
|
||||
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(lhs.context_), dtype_(dtype), shape_(shape)
|
||||
array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape) :
|
||||
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
|
||||
{
|
||||
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
|
||||
fill(tree_[root_].lhs, lhs.root_);
|
||||
@@ -85,8 +85,8 @@ array_expression::array_expression(array_expression const & lhs, RT const & rhs,
|
||||
}
|
||||
|
||||
template<class LT>
|
||||
array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape) :
|
||||
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(rhs.context_), dtype_(dtype), shape_(shape)
|
||||
array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape) :
|
||||
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(context), dtype_(dtype), shape_(shape)
|
||||
{
|
||||
std::copy(rhs.tree_.begin(), rhs.tree_.end(), tree_.begin());
|
||||
fill(tree_[root_].lhs, lhs);
|
||||
@@ -94,8 +94,8 @@ array_expression::array_expression(LT const & lhs, array_expression const & rhs,
|
||||
fill(tree_[root_].rhs, rhs.root_);
|
||||
}
|
||||
|
||||
array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape):
|
||||
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(lhs.context_), dtype_(dtype), shape_(shape)
|
||||
array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape):
|
||||
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
|
||||
{
|
||||
std::size_t lsize = lhs.tree_.size();
|
||||
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
|
||||
@@ -110,15 +110,15 @@ array_expression::array_expression(array_expression const & lhs, array_expressio
|
||||
root_ = tree_.size() - 1;
|
||||
}
|
||||
|
||||
template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array_expression const &, array const &, op_element const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array_expression const &, array const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
|
||||
template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(array const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
|
||||
template array_expression::array_expression(value_scalar const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(invalid_node const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
@@ -140,6 +140,8 @@ template array_expression::array_expression(invalid_node const &, repeat_infos c
|
||||
template array_expression::array_expression(array const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
template array_expression::array_expression(repeat_infos const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||
|
||||
|
||||
|
||||
array_expression::container_type & array_expression::tree()
|
||||
{ return tree_; }
|
||||
|
||||
@@ -169,10 +171,10 @@ array_expression& array_expression::reshape(int_t size1, int_t size2)
|
||||
}
|
||||
|
||||
array_expression array_expression::operator-()
|
||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); }
|
||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array_expression array_expression::operator!()
|
||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), INT_TYPE, shape_); }
|
||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
|
||||
|
||||
|
||||
//
|
||||
|
@@ -213,7 +213,7 @@ class ArgumentsHandler:
|
||||
self.blas3_size = map(int, self.blas3_size)
|
||||
|
||||
if __name__ == "__main__":
|
||||
atd.state.queue_properties = atd.queue_properties_type.CL_QUEUE_PROFILING_ENABLE
|
||||
atd.state.queue_properties = atd.CL_QUEUE_PROFILING_ENABLE
|
||||
|
||||
platforms = atd.get_platforms()
|
||||
devices = [d for platform in platforms for d in platform.get_devices()]
|
||||
|
@@ -220,11 +220,9 @@ def benchmark(template, symbolic):
|
||||
queue.models[template, atd.float32] = atd.model(template, queue)
|
||||
x = atd.array(symbolic)
|
||||
atd.synchronize(symbolic.context)
|
||||
current_time = 0
|
||||
timings = []
|
||||
x, event, cache = atd.flush(symbolic)
|
||||
x, events, cache = atd.flush(symbolic)
|
||||
atd.synchronize(symbolic.context)
|
||||
return 1e-9*(event.end - event.start)
|
||||
return 1e-9*sum([e.end - e.start for e in events])
|
||||
|
||||
|
||||
def sanitize_string(string, keep_chars = ['_']):
|
||||
|
@@ -317,13 +317,13 @@ namespace detail
|
||||
|
||||
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, int label, std::string const & program_name, bool force_recompile)
|
||||
{
|
||||
cl::Event event;
|
||||
std::list<cl::Event> events;
|
||||
atd::operation_cache cache;
|
||||
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
|
||||
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &event, &cache, &cdependencies),
|
||||
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &events, &cache, &cdependencies),
|
||||
atd::dispatcher_options_type(label), atd::compilation_options_type(program_name, force_recompile))));
|
||||
|
||||
return bp::make_tuple(*parray, event, cache);
|
||||
return bp::make_tuple(*parray, to_list(events.begin(), events.end()), cache);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -404,16 +404,14 @@ void export_cl()
|
||||
|
||||
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
|
||||
|
||||
bp::enum_<cl_command_queue_properties>("queue_properties_type")
|
||||
.value("CL_QUEUE_PROFILING_ENABLE", CL_QUEUE_PROFILING_ENABLE)
|
||||
.value("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
|
||||
;
|
||||
|
||||
bp::class_<state_type>("state_type")
|
||||
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties)
|
||||
;
|
||||
|
||||
bp::scope().attr("state") = bp::object(bp::ptr(&state));
|
||||
|
||||
bp::scope().attr("CL_QUEUE_PROFILING_ENABLE") = CL_QUEUE_PROFILING_ENABLE;
|
||||
bp::scope().attr("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE") = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
}
|
||||
|
||||
namespace detail
|
||||
|
Reference in New Issue
Block a user