Now using a list of event instead of a single one
This commit is contained in:
162
bench/blas.cpp
162
bench/blas.cpp
@@ -22,8 +22,25 @@ template<class T>
|
|||||||
void bench(ad::numeric_type dtype)
|
void bench(ad::numeric_type dtype)
|
||||||
{
|
{
|
||||||
unsigned int dtsize = ad::size_of(dtype);
|
unsigned int dtsize = ad::size_of(dtype);
|
||||||
|
cl::CommandQueue & queue = ad::cl_ext::queues[ad::cl_ext::default_context()][0];
|
||||||
|
|
||||||
#define BENCHMARK_OPENCL(OP, PERF) \
|
#define BENCHMARK_ATIDLAS(OP, PERF) \
|
||||||
|
{\
|
||||||
|
std::vector<long> times;\
|
||||||
|
double total_time = 0;\
|
||||||
|
while(total_time*1e-9 < 1e-1){\
|
||||||
|
std::list<cl::Event> events;\
|
||||||
|
OP;\
|
||||||
|
queue.finish();\
|
||||||
|
times.push_back(std::accumulate(events.begin(), events.end(), 0, \
|
||||||
|
[](unsigned long sum, cl::Event const & e){ return sum + e.getProfilingInfo<CL_PROFILING_COMMAND_END>() - e.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>();}));\
|
||||||
|
total_time+=times.back();\
|
||||||
|
}\
|
||||||
|
double t = median(times);\
|
||||||
|
std::cout << " " << PERF << std::flush;\
|
||||||
|
}
|
||||||
|
|
||||||
|
#define BENCHMARK_CLAMDBLAS(OP, PERF) \
|
||||||
{\
|
{\
|
||||||
std::vector<long> times;\
|
std::vector<long> times;\
|
||||||
double total_time = 0;\
|
double total_time = 0;\
|
||||||
@@ -69,41 +86,40 @@ void bench(ad::numeric_type dtype)
|
|||||||
std::cout << " " << PERF << std::flush;\
|
std::cout << " " << PERF << std::flush;\
|
||||||
}
|
}
|
||||||
|
|
||||||
// /*---------*/
|
/*---------*/
|
||||||
// /*--BLAS1--*/
|
/*--BLAS1--*/
|
||||||
// /*---------*/
|
/*---------*/
|
||||||
// std::cout << "#AXPY" << std::endl;
|
std::cout << "#AXPY" << std::endl;
|
||||||
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
||||||
// {
|
{
|
||||||
// std::cout << N;
|
std::cout << N;
|
||||||
// ad::array x(N, dtype), y(N, dtype);
|
ad::array x(N, dtype), y(N, dtype);
|
||||||
// cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
|
/* ATIDLAS */
|
||||||
// /* ATIDLAS */
|
y = x + y; queue.flush(); queue.finish();
|
||||||
// y = x + y; queue.flush(); queue.finish();
|
BENCHMARK_ATIDLAS(y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &events)), 3*N*dtsize/t)
|
||||||
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(x + y, ad::execution_options_type(0, &event)), 3*N*dtsize/t)
|
/* clAmdBlas */
|
||||||
// /* clAmdBlas */
|
#ifdef BENCH_CLAMDBLAS
|
||||||
//#ifdef BENCH_CLAMDBLAS
|
BENCHMARK_CLAMDBLAS(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
|
||||||
// BENCHMARK_OPENCL(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
|
#endif
|
||||||
//#endif
|
/* BLAS */
|
||||||
// /* BLAS */
|
#ifdef BENCH_CBLAS
|
||||||
//#ifdef BENCH_CBLAS
|
std::vector<float> cx(N), cy(N);
|
||||||
// std::vector<float> cx(N), cy(N);
|
ad::copy(x, cx);
|
||||||
// ad::copy(x, cx);
|
ad::copy(y, cy);
|
||||||
// ad::copy(y, cy);
|
BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
|
||||||
// BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
|
#endif
|
||||||
//#endif
|
/* CuBLAS */
|
||||||
// /* CuBLAS */
|
#ifdef BENCH_CUBLAS
|
||||||
//#ifdef BENCH_CUBLAS
|
T *cux, *cuy;
|
||||||
// T *cux, *cuy;
|
cudaMalloc((void**) &cux, N * sizeof(T));
|
||||||
// cudaMalloc((void**) &cux, N * sizeof(T));
|
cudaMalloc((void**) &cuy, N * sizeof(T));
|
||||||
// cudaMalloc((void**) &cuy, N * sizeof(T));
|
BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
|
||||||
// BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
|
cudaFree(cux);
|
||||||
// cudaFree(cux);
|
cudaFree(cuy);
|
||||||
// cudaFree(cuy);
|
#endif
|
||||||
//#endif
|
std::cout << std::endl;
|
||||||
// std::cout << std::endl;
|
}
|
||||||
// }
|
std::cout << "\n\n" << std::flush;
|
||||||
// std::cout << "\n\n" << std::flush;
|
|
||||||
|
|
||||||
// std::cout << "#DOT" << std::endl;
|
// std::cout << "#DOT" << std::endl;
|
||||||
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
||||||
@@ -111,7 +127,6 @@ void bench(ad::numeric_type dtype)
|
|||||||
// std::cout << N;
|
// std::cout << N;
|
||||||
// /* ATIDLAS */
|
// /* ATIDLAS */
|
||||||
// ad::array x(N, dtype), y(N, dtype);
|
// ad::array x(N, dtype), y(N, dtype);
|
||||||
// cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
|
|
||||||
// ad::array scratch(N, dtype);
|
// ad::array scratch(N, dtype);
|
||||||
// ad::scalar s(dtype);
|
// ad::scalar s(dtype);
|
||||||
// s = dot(x,y); queue.flush(); queue.finish();
|
// s = dot(x,y); queue.flush(); queue.finish();
|
||||||
@@ -140,43 +155,42 @@ void bench(ad::numeric_type dtype)
|
|||||||
// }
|
// }
|
||||||
// std::cout << "\n\n" << std::flush;
|
// std::cout << "\n\n" << std::flush;
|
||||||
|
|
||||||
/*---------*/
|
// /*---------*/
|
||||||
/*--BLAS2--*/
|
// /*--BLAS2--*/
|
||||||
/*---------*/
|
// /*---------*/
|
||||||
//T-layout
|
// //T-layout
|
||||||
std::cout << "#GEMV-T" << std::endl;
|
// std::cout << "#GEMV-T" << std::endl;
|
||||||
for(int_t N: std::vector<int>{64})
|
// for(int_t N: std::vector<int>{64})
|
||||||
for(int_t M: create_full_range(128, 10000, 64))
|
// for(int_t M: create_full_range(128, 10000, 64))
|
||||||
{
|
// {
|
||||||
std::cout << M << "," << N;
|
// std::cout << M << "," << N;
|
||||||
/* ATIDLAS */
|
// /* ATIDLAS */
|
||||||
ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
// ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||||
cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
|
// y = dot(trans(A),x); queue.flush(); queue.finish();
|
||||||
y = dot(trans(A),x); queue.flush(); queue.finish();
|
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
|
||||||
BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
|
// #ifdef BENCH_CLAMDBLAS
|
||||||
#ifdef BENCH_CLAMDBLAS
|
// BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
|
||||||
BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
|
// #endif
|
||||||
#endif
|
// #ifdef BENCH_CBLAS
|
||||||
#ifdef BENCH_CBLAS
|
// std::vector<float> cA(N*M), cx(N), cy(M);
|
||||||
std::vector<float> cA(N*M), cx(N), cy(M);
|
// ad::copy(x, cx);
|
||||||
ad::copy(x, cx);
|
// ad::copy(y, cy);
|
||||||
ad::copy(y, cy);
|
// ad::copy(A, cA);
|
||||||
ad::copy(A, cA);
|
// BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
|
||||||
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
|
// #endif
|
||||||
#endif
|
// #ifdef BENCH_CUBLAS
|
||||||
#ifdef BENCH_CUBLAS
|
// T *cuA, *cux, *cuy;
|
||||||
T *cuA, *cux, *cuy;
|
// cudaMalloc((void**) &cuA, N * M * sizeof(T));
|
||||||
cudaMalloc((void**) &cuA, N * M * sizeof(T));
|
// cudaMalloc((void**) &cux, N * sizeof(T));
|
||||||
cudaMalloc((void**) &cux, N * sizeof(T));
|
// cudaMalloc((void**) &cuy, M * sizeof(T));
|
||||||
cudaMalloc((void**) &cuy, M * sizeof(T));
|
// BENCHMARK_CUDA(cublasSgemv(cublasTrans, N, M, 1, cuA, N, cux, 1, 0, cuy, 1), (M*N + M + N)*dtsize/t)
|
||||||
BENCHMARK_CUDA(cublasSgemv(cublasTrans, N, M, 1, cuA, N, cux, 1, 0, cuy, 1), (M*N + M + N)*dtsize/t)
|
// cudaFree(cuA);
|
||||||
cudaFree(cuA);
|
// cudaFree(cux);
|
||||||
cudaFree(cux);
|
// cudaFree(cuy);
|
||||||
cudaFree(cuy);
|
// #endif
|
||||||
#endif
|
// std::cout << std::endl;
|
||||||
std::cout << std::endl;
|
// }
|
||||||
}
|
// std::cout << "\n\n" << std::flush;
|
||||||
std::cout << "\n\n" << std::flush;
|
|
||||||
|
|
||||||
//// /*---------*/
|
//// /*---------*/
|
||||||
//// /*--BLAS3--*/
|
//// /*--BLAS3--*/
|
||||||
|
@@ -182,12 +182,12 @@ public:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
template<class LT, class RT>
|
template<class LT, class RT>
|
||||||
array_expression(LT const & lhs, RT const & rhs, op_element const & op, cl::Context const & ctx, numeric_type const & dtype, size4 const & shape);
|
array_expression(LT const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
|
||||||
template<class RT>
|
template<class RT>
|
||||||
array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
|
array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
|
||||||
template<class LT>
|
template<class LT>
|
||||||
array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
|
array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
|
||||||
array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape);
|
array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape);
|
||||||
|
|
||||||
size4 shape() const;
|
size4 shape() const;
|
||||||
array_expression& reshape(int_t size1, int_t size2=1);
|
array_expression& reshape(int_t size1, int_t size2=1);
|
||||||
@@ -218,17 +218,18 @@ class operation_cache
|
|||||||
cl::NDRange global;
|
cl::NDRange global;
|
||||||
cl::NDRange local;
|
cl::NDRange local;
|
||||||
std::vector<cl::Event>* dependencies;
|
std::vector<cl::Event>* dependencies;
|
||||||
cl::Event* event;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies, cl::Event* event)
|
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies)
|
||||||
{ l_.push_back({queue, kernel, offset, global, local, dependencies, event}); }
|
{ l_.push_back({queue, kernel, offset, global, local, dependencies}); }
|
||||||
|
|
||||||
void enqueue()
|
void enqueue(std::list<cl::Event>* events = NULL)
|
||||||
{
|
{
|
||||||
for(infos & i : l_)
|
for(infos & i : l_){
|
||||||
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, i.event);
|
events->push_back(cl::Event());
|
||||||
|
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, &events->back());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -237,17 +238,22 @@ private:
|
|||||||
|
|
||||||
struct execution_options_type
|
struct execution_options_type
|
||||||
{
|
{
|
||||||
execution_options_type(unsigned int _queue_id = 0, cl::Event* _event = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), event(_event), cache(_cache), dependencies(_dependencies){}
|
execution_options_type(unsigned int _queue_id = 0, std::list<cl::Event>* _events = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), events(_events), cache(_cache), dependencies(_dependencies){}
|
||||||
|
|
||||||
void enqueue_cache(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange offset, cl::NDRange global, cl::NDRange local) const
|
void enqueue_cache(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange offset, cl::NDRange global, cl::NDRange local) const
|
||||||
{
|
{
|
||||||
|
cl::Event* event = NULL;
|
||||||
|
if(events){
|
||||||
|
events->push_back(cl::Event());
|
||||||
|
event = &events->back();
|
||||||
|
}
|
||||||
queue.enqueueNDRangeKernel(kernel, offset, global, local, dependencies, event);
|
queue.enqueueNDRangeKernel(kernel, offset, global, local, dependencies, event);
|
||||||
if(cache)
|
if(cache)
|
||||||
cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies, event);
|
cache->push_back(queue, kernel, cl::NullRange, global, local, dependencies);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int queue_id;
|
unsigned int queue_id;
|
||||||
cl::Event* event;
|
std::list<cl::Event>* events;
|
||||||
operation_cache* cache;
|
operation_cache* cache;
|
||||||
std::vector<cl::Event>* dependencies;
|
std::vector<cl::Event>* dependencies;
|
||||||
};
|
};
|
||||||
|
@@ -139,7 +139,8 @@ template<class TYPE>
|
|||||||
array& array::operator=(controller<TYPE> const & c)
|
array& array::operator=(controller<TYPE> const & c)
|
||||||
{
|
{
|
||||||
assert(dtype_ == c.x().dtype());
|
assert(dtype_ == c.x().dtype());
|
||||||
execute(controller<array_expression>(detail::assign(*this, c.x()), c.execution_options(), c.dispatcher_options(), c.compilation_options()),
|
array_expression expression(*this, c.x(), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
|
||||||
|
execute(controller<array_expression>(expression, c.execution_options(), c.dispatcher_options(), c.compilation_options()),
|
||||||
atidlas::get_model_map(cl_ext::queues[context_][c.execution_options().queue_id]));
|
atidlas::get_model_map(cl_ext::queues[context_][c.execution_options().queue_id]));
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@@ -180,7 +181,7 @@ array & array::operator+=(array const & rhs)
|
|||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
|
||||||
|
|
||||||
array & array::operator+=(array_expression const & rhs)
|
array & array::operator+=(array_expression const & rhs)
|
||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), rhs.context(), dtype_, shape_); }
|
||||||
//----
|
//----
|
||||||
array & array::operator-=(value_scalar const & rhs)
|
array & array::operator-=(value_scalar const & rhs)
|
||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||||
@@ -189,7 +190,7 @@ array & array::operator-=(array const & rhs)
|
|||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||||
|
|
||||||
array & array::operator-=(array_expression const & rhs)
|
array & array::operator-=(array_expression const & rhs)
|
||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), rhs.context(), dtype_, shape_); }
|
||||||
//----
|
//----
|
||||||
array & array::operator*=(value_scalar const & rhs)
|
array & array::operator*=(value_scalar const & rhs)
|
||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
|
||||||
@@ -198,7 +199,7 @@ array & array::operator*=(array const & rhs)
|
|||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
|
||||||
|
|
||||||
array & array::operator*=(array_expression const & rhs)
|
array & array::operator*=(array_expression const & rhs)
|
||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), rhs.context(), dtype_, shape_); }
|
||||||
//----
|
//----
|
||||||
array & array::operator/=(value_scalar const & rhs)
|
array & array::operator/=(value_scalar const & rhs)
|
||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
|
||||||
@@ -207,7 +208,7 @@ array & array::operator/=(array const & rhs)
|
|||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
|
||||||
|
|
||||||
array & array::operator/=(array_expression const & rhs)
|
array & array::operator/=(array_expression const & rhs)
|
||||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), dtype_, shape_); }
|
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), rhs.context(), dtype_, shape_); }
|
||||||
|
|
||||||
array_expression array::T() const
|
array_expression array::T() const
|
||||||
{ return atidlas::trans(*this) ;}
|
{ return atidlas::trans(*this) ;}
|
||||||
@@ -394,28 +395,28 @@ bool check_elementwise(U const & u, V const & v)
|
|||||||
#define DEFINE_ELEMENT_BINARY_OPERATOR(OP, OPNAME, DTYPE) \
|
#define DEFINE_ELEMENT_BINARY_OPERATOR(OP, OPNAME, DTYPE) \
|
||||||
array_expression OPNAME (array_expression const & x, array_expression const & y) \
|
array_expression OPNAME (array_expression const & x, array_expression const & y) \
|
||||||
{ assert(check_elementwise(x, y));\
|
{ assert(check_elementwise(x, y));\
|
||||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
|
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
|
||||||
\
|
\
|
||||||
array_expression OPNAME (array const & x, array_expression const & y) \
|
array_expression OPNAME (array const & x, array_expression const & y) \
|
||||||
{ assert(check_elementwise(x, y));\
|
{ assert(check_elementwise(x, y));\
|
||||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
|
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
|
||||||
\
|
\
|
||||||
array_expression OPNAME (array_expression const & x, array const & y) \
|
array_expression OPNAME (array_expression const & x, array const & y) \
|
||||||
{ assert(check_elementwise(x, y));\
|
{ assert(check_elementwise(x, y));\
|
||||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, elementwise_size(x, y)); } \
|
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); } \
|
||||||
\
|
\
|
||||||
array_expression OPNAME (array const & x, array const & y) \
|
array_expression OPNAME (array const & x, array const & y) \
|
||||||
{ assert(check_elementwise(x, y));\
|
{ assert(check_elementwise(x, y));\
|
||||||
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); }\
|
return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, elementwise_size(x, y)); }\
|
||||||
\
|
\
|
||||||
array_expression OPNAME (array_expression const & x, value_scalar const & y) \
|
array_expression OPNAME (array_expression const & x, value_scalar const & y) \
|
||||||
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, x.shape()); } \
|
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } \
|
||||||
\
|
\
|
||||||
array_expression OPNAME (array const & x, value_scalar const & y) \
|
array_expression OPNAME (array const & x, value_scalar const & y) \
|
||||||
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }\
|
{ return array_expression(x, y, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }\
|
||||||
\
|
\
|
||||||
array_expression OPNAME (value_scalar const & y, array_expression const & x) \
|
array_expression OPNAME (value_scalar const & y, array_expression const & x) \
|
||||||
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), DTYPE, x.shape()); } \
|
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); } \
|
||||||
\
|
\
|
||||||
array_expression OPNAME (value_scalar const & y, array const & x) \
|
array_expression OPNAME (value_scalar const & y, array const & x) \
|
||||||
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }
|
{ return array_expression(y, x, op_element(OPERATOR_BINARY_TYPE_FAMILY, OP), x.context(), DTYPE, x.shape()); }
|
||||||
@@ -458,7 +459,7 @@ array_expression OPNAME (array const & x) \
|
|||||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }\
|
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }\
|
||||||
\
|
\
|
||||||
array_expression OPNAME (array_expression const & x) \
|
array_expression OPNAME (array_expression const & x) \
|
||||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.dtype(), x.shape()); }
|
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OP), x.context(), x.dtype(), x.shape()); }
|
||||||
|
|
||||||
DEFINE_ELEMENT_UNARY_OPERATOR((x.dtype()==FLOAT_TYPE || x.dtype()==DOUBLE_TYPE)?OPERATOR_FABS_TYPE:OPERATOR_ABS_TYPE, abs)
|
DEFINE_ELEMENT_UNARY_OPERATOR((x.dtype()==FLOAT_TYPE || x.dtype()==DOUBLE_TYPE)?OPERATOR_FABS_TYPE:OPERATOR_ABS_TYPE, abs)
|
||||||
DEFINE_ELEMENT_UNARY_OPERATOR(OPERATOR_ACOS_TYPE, acos)
|
DEFINE_ELEMENT_UNARY_OPERATOR(OPERATOR_ACOS_TYPE, acos)
|
||||||
@@ -506,7 +507,7 @@ array_expression cast(array const & x, numeric_type dtype)
|
|||||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
|
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
|
||||||
|
|
||||||
array_expression cast(array_expression const & x, numeric_type dtype)
|
array_expression cast(array_expression const & x, numeric_type dtype)
|
||||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), dtype, x.shape()); }
|
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, casted(dtype)), x.context(), dtype, x.shape()); }
|
||||||
|
|
||||||
atidlas::array_expression eye(std::size_t M, std::size_t N, atidlas::numeric_type dtype, cl::Context ctx)
|
atidlas::array_expression eye(std::size_t M, std::size_t N, atidlas::numeric_type dtype, cl::Context ctx)
|
||||||
{ return array_expression(value_scalar(1), value_scalar(0), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_VDIAG_TYPE), ctx, dtype, size4(M, N)); }
|
{ return array_expression(value_scalar(1), value_scalar(0), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_VDIAG_TYPE), ctx, dtype, size4(M, N)); }
|
||||||
@@ -524,7 +525,7 @@ array_expression trans(array const & x) \
|
|||||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }\
|
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }\
|
||||||
\
|
\
|
||||||
array_expression trans(array_expression const & x) \
|
array_expression trans(array_expression const & x) \
|
||||||
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.dtype(), flip(x.shape())); }
|
{ return array_expression(x, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }
|
||||||
|
|
||||||
array_expression repmat(array const & A, int_t const & rep1, int_t const & rep2)
|
array_expression repmat(array const & A, int_t const & rep1, int_t const & rep2)
|
||||||
{
|
{
|
||||||
@@ -543,7 +544,7 @@ array_expression repmat(array_expression const & A, int_t const & rep1, int_t co
|
|||||||
infos.rep2 = rep2;
|
infos.rep2 = rep2;
|
||||||
infos.sub1 = A.shape()._1;
|
infos.sub1 = A.shape()._1;
|
||||||
infos.sub2 = A.shape()._2;
|
infos.sub2 = A.shape()._2;
|
||||||
return array_expression(A, infos, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_REPEAT_TYPE), A.dtype(), size4(infos.rep1*infos.sub1, infos.rep2*infos.sub2));
|
return array_expression(A, infos, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_REPEAT_TYPE), A.context(), A.dtype(), size4(infos.rep1*infos.sub1, infos.rep2*infos.sub2));
|
||||||
}
|
}
|
||||||
|
|
||||||
////---------------------------------------
|
////---------------------------------------
|
||||||
@@ -568,11 +569,11 @@ array_expression OPNAME(array_expression const & x, int_t axis)\
|
|||||||
if(axis < -1 || axis > x.nshape())\
|
if(axis < -1 || axis > x.nshape())\
|
||||||
throw std::out_of_range("The axis entry is out of bounds");\
|
throw std::out_of_range("The axis entry is out of bounds");\
|
||||||
if(axis==-1)\
|
if(axis==-1)\
|
||||||
return array_expression(x, invalid_node(), op_element(OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(1));\
|
return array_expression(x, invalid_node(), op_element(OPERATOR_VECTOR_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(1));\
|
||||||
else if(axis==0)\
|
else if(axis==0)\
|
||||||
return array_expression(x, invalid_node(), op_element(OPERATOR_ROWS_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(x.shape()._1));\
|
return array_expression(x, invalid_node(), op_element(OPERATOR_ROWS_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(x.shape()._1));\
|
||||||
else\
|
else\
|
||||||
return array_expression(x, invalid_node(), op_element(OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY, OP), x.dtype(), size4(x.shape()._2));\
|
return array_expression(x, invalid_node(), op_element(OPERATOR_COLUMNS_REDUCTION_TYPE_FAMILY, OP), x.context(), x.dtype(), size4(x.shape()._2));\
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_REDUCTION(OPERATOR_ADD_TYPE, sum)
|
DEFINE_REDUCTION(OPERATOR_ADD_TYPE, sum)
|
||||||
@@ -604,7 +605,7 @@ namespace detail
|
|||||||
shape._1 = A.shape()._2;
|
shape._1 = A.shape()._2;
|
||||||
}
|
}
|
||||||
|
|
||||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
|
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
|
||||||
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
||||||
if(A_trans) res_root.lhs = A_root.lhs;
|
if(A_trans) res_root.lhs = A_root.lhs;
|
||||||
return res;
|
return res;
|
||||||
@@ -621,7 +622,7 @@ namespace detail
|
|||||||
type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
|
type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
|
||||||
shape._2 = B.shape()._1;
|
shape._2 = B.shape()._1;
|
||||||
}
|
}
|
||||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
|
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
|
||||||
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
||||||
if(B_trans) res_root.rhs = B_root.lhs;
|
if(B_trans) res_root.rhs = B_root.lhs;
|
||||||
return res;
|
return res;
|
||||||
@@ -643,7 +644,7 @@ namespace detail
|
|||||||
else if(!A_trans && B_trans) type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
|
else if(!A_trans && B_trans) type = OPERATOR_MATRIX_PRODUCT_NT_TYPE;
|
||||||
else type = OPERATOR_MATRIX_PRODUCT_NN_TYPE;
|
else type = OPERATOR_MATRIX_PRODUCT_NN_TYPE;
|
||||||
|
|
||||||
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.dtype(), shape);
|
array_expression res(A, B, op_element(OPERATOR_MATRIX_PRODUCT_TYPE_FAMILY, type), A.context(), A.dtype(), shape);
|
||||||
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
array_expression::node & res_root = const_cast<array_expression::node &>(res.tree()[res.root()]);
|
||||||
if(A_trans) res_root.lhs = A_root.lhs;
|
if(A_trans) res_root.lhs = A_root.lhs;
|
||||||
if(B_trans) res_root.rhs = B_root.lhs;
|
if(B_trans) res_root.rhs = B_root.lhs;
|
||||||
@@ -667,7 +668,7 @@ namespace detail
|
|||||||
bool A_trans = A_root.op.type==OPERATOR_TRANS_TYPE;
|
bool A_trans = A_root.op.type==OPERATOR_TRANS_TYPE;
|
||||||
if(A_trans)
|
if(A_trans)
|
||||||
{
|
{
|
||||||
array_expression tmp(A, repmat(x, 1, M), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ELEMENT_PROD_TYPE), A.dtype(), size4(N, M));
|
array_expression tmp(A, repmat(x, 1, M), op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ELEMENT_PROD_TYPE), A.context(), A.dtype(), size4(N, M));
|
||||||
//Remove trans
|
//Remove trans
|
||||||
tmp.tree()[tmp.root()].lhs = A.tree()[A.root()].lhs;
|
tmp.tree()[tmp.root()].lhs = A.tree()[A.root()].lhs;
|
||||||
return sum(tmp, 1);
|
return sum(tmp, 1);
|
||||||
|
@@ -75,8 +75,8 @@ array_expression::array_expression(LT const & lhs, RT const & rhs, op_element co
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class RT>
|
template<class RT>
|
||||||
array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape) :
|
array_expression::array_expression(array_expression const & lhs, RT const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape) :
|
||||||
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(lhs.context_), dtype_(dtype), shape_(shape)
|
tree_(lhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
|
||||||
{
|
{
|
||||||
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
|
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
|
||||||
fill(tree_[root_].lhs, lhs.root_);
|
fill(tree_[root_].lhs, lhs.root_);
|
||||||
@@ -85,8 +85,8 @@ array_expression::array_expression(array_expression const & lhs, RT const & rhs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class LT>
|
template<class LT>
|
||||||
array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape) :
|
array_expression::array_expression(LT const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape) :
|
||||||
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(rhs.context_), dtype_(dtype), shape_(shape)
|
tree_(rhs.tree_.size() + 1), root_(tree_.size() - 1), context_(context), dtype_(dtype), shape_(shape)
|
||||||
{
|
{
|
||||||
std::copy(rhs.tree_.begin(), rhs.tree_.end(), tree_.begin());
|
std::copy(rhs.tree_.begin(), rhs.tree_.end(), tree_.begin());
|
||||||
fill(tree_[root_].lhs, lhs);
|
fill(tree_[root_].lhs, lhs);
|
||||||
@@ -94,8 +94,8 @@ array_expression::array_expression(LT const & lhs, array_expression const & rhs,
|
|||||||
fill(tree_[root_].rhs, rhs.root_);
|
fill(tree_[root_].rhs, rhs.root_);
|
||||||
}
|
}
|
||||||
|
|
||||||
array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, numeric_type const & dtype, size4 const & shape):
|
array_expression::array_expression(array_expression const & lhs, array_expression const & rhs, op_element const & op, cl::Context const & context, numeric_type const & dtype, size4 const & shape):
|
||||||
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(lhs.context_), dtype_(dtype), shape_(shape)
|
tree_(lhs.tree_.size() + rhs.tree_.size() + 1), root_(tree_.size()-1), context_(context), dtype_(dtype), shape_(shape)
|
||||||
{
|
{
|
||||||
std::size_t lsize = lhs.tree_.size();
|
std::size_t lsize = lhs.tree_.size();
|
||||||
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
|
std::copy(lhs.tree_.begin(), lhs.tree_.end(), tree_.begin());
|
||||||
@@ -110,15 +110,15 @@ array_expression::array_expression(array_expression const & lhs, array_expressio
|
|||||||
root_ = tree_.size() - 1;
|
root_ = tree_.size() - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(array_expression const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(array_expression const &, invalid_node const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
template array_expression::array_expression(array_expression const &, array const &, op_element const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(array_expression const &, array const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(array_expression const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
|
|
||||||
template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(value_scalar const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(invalid_node const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
template array_expression::array_expression(array const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(array const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(repeat_infos const &, array_expression const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
|
|
||||||
template array_expression::array_expression(value_scalar const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(value_scalar const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
template array_expression::array_expression(invalid_node const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(invalid_node const &, value_scalar const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
@@ -140,6 +140,8 @@ template array_expression::array_expression(invalid_node const &, repeat_infos c
|
|||||||
template array_expression::array_expression(array const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(array const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
template array_expression::array_expression(repeat_infos const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
template array_expression::array_expression(repeat_infos const &, repeat_infos const &, op_element const &, cl::Context const &, numeric_type const &, size4 const &);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
array_expression::container_type & array_expression::tree()
|
array_expression::container_type & array_expression::tree()
|
||||||
{ return tree_; }
|
{ return tree_; }
|
||||||
|
|
||||||
@@ -169,10 +171,10 @@ array_expression& array_expression::reshape(int_t size1, int_t size2)
|
|||||||
}
|
}
|
||||||
|
|
||||||
array_expression array_expression::operator-()
|
array_expression array_expression::operator-()
|
||||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); }
|
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||||
|
|
||||||
array_expression array_expression::operator!()
|
array_expression array_expression::operator!()
|
||||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), INT_TYPE, shape_); }
|
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@@ -213,7 +213,7 @@ class ArgumentsHandler:
|
|||||||
self.blas3_size = map(int, self.blas3_size)
|
self.blas3_size = map(int, self.blas3_size)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
atd.state.queue_properties = atd.queue_properties_type.CL_QUEUE_PROFILING_ENABLE
|
atd.state.queue_properties = atd.CL_QUEUE_PROFILING_ENABLE
|
||||||
|
|
||||||
platforms = atd.get_platforms()
|
platforms = atd.get_platforms()
|
||||||
devices = [d for platform in platforms for d in platform.get_devices()]
|
devices = [d for platform in platforms for d in platform.get_devices()]
|
||||||
|
@@ -220,11 +220,9 @@ def benchmark(template, symbolic):
|
|||||||
queue.models[template, atd.float32] = atd.model(template, queue)
|
queue.models[template, atd.float32] = atd.model(template, queue)
|
||||||
x = atd.array(symbolic)
|
x = atd.array(symbolic)
|
||||||
atd.synchronize(symbolic.context)
|
atd.synchronize(symbolic.context)
|
||||||
current_time = 0
|
x, events, cache = atd.flush(symbolic)
|
||||||
timings = []
|
|
||||||
x, event, cache = atd.flush(symbolic)
|
|
||||||
atd.synchronize(symbolic.context)
|
atd.synchronize(symbolic.context)
|
||||||
return 1e-9*(event.end - event.start)
|
return 1e-9*sum([e.end - e.start for e in events])
|
||||||
|
|
||||||
|
|
||||||
def sanitize_string(string, keep_chars = ['_']):
|
def sanitize_string(string, keep_chars = ['_']):
|
||||||
|
@@ -317,13 +317,13 @@ namespace detail
|
|||||||
|
|
||||||
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, int label, std::string const & program_name, bool force_recompile)
|
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, int label, std::string const & program_name, bool force_recompile)
|
||||||
{
|
{
|
||||||
cl::Event event;
|
std::list<cl::Event> events;
|
||||||
atd::operation_cache cache;
|
atd::operation_cache cache;
|
||||||
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
|
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
|
||||||
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &event, &cache, &cdependencies),
|
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &events, &cache, &cdependencies),
|
||||||
atd::dispatcher_options_type(label), atd::compilation_options_type(program_name, force_recompile))));
|
atd::dispatcher_options_type(label), atd::compilation_options_type(program_name, force_recompile))));
|
||||||
|
|
||||||
return bp::make_tuple(*parray, event, cache);
|
return bp::make_tuple(*parray, to_list(events.begin(), events.end()), cache);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -404,16 +404,14 @@ void export_cl()
|
|||||||
|
|
||||||
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
|
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
|
||||||
|
|
||||||
bp::enum_<cl_command_queue_properties>("queue_properties_type")
|
|
||||||
.value("CL_QUEUE_PROFILING_ENABLE", CL_QUEUE_PROFILING_ENABLE)
|
|
||||||
.value("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
|
|
||||||
;
|
|
||||||
|
|
||||||
bp::class_<state_type>("state_type")
|
bp::class_<state_type>("state_type")
|
||||||
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties)
|
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties)
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::scope().attr("state") = bp::object(bp::ptr(&state));
|
bp::scope().attr("state") = bp::object(bp::ptr(&state));
|
||||||
|
|
||||||
|
bp::scope().attr("CL_QUEUE_PROFILING_ENABLE") = CL_QUEUE_PROFILING_ENABLE;
|
||||||
|
bp::scope().attr("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE") = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace detail
|
namespace detail
|
||||||
|
Reference in New Issue
Block a user