API: clearer interface for transposition

This commit is contained in:
Philippe Tillet
2015-10-01 17:23:26 -04:00
parent feeb1e9862
commit 1e076c131b
5 changed files with 50 additions and 32 deletions

View File

@@ -234,8 +234,8 @@ void bench(sc::numeric_type dtype, std::string operation)
#ifdef HAS_A_BLAS
int_t lda = A.ld();
#endif
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T(),x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T(),x):dot(A,x), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)),(M*N + M + N)*dtsize/t);
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T,x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T,x):dot(A,x), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)),(M*N + M + N)*dtsize/t);
#ifdef BENCH_CLBLAS
if(y.context().backend()==sc::driver::OPENCL)
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t)
@@ -317,8 +317,8 @@ void bench(sc::numeric_type dtype, std::string operation)
#ifdef HAS_A_BLAS
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
#endif
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t);
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t);
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
/* clblas */
#ifdef BENCH_CLBLAS
if(C.context().backend()==sc::driver::OPENCL)

View File

@@ -91,7 +91,7 @@ public:
array operator[](slice const &);
array operator()(slice const &, slice const &);
math_expression T() const;
protected:
numeric_type dtype_;
@@ -102,6 +102,9 @@ protected:
driver::Context context_;
driver::Buffer data_;
public:
math_expression T;
};
class ISAACAPI scalar : public array

View File

@@ -24,22 +24,27 @@ namespace isaac
array::array(int_t shape0, numeric_type dtype, driver::Context const & context) :
dtype_(dtype), shape_(shape0, 1, 1, 1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape_[0]),
context_(context), data_(context_, size_of(dtype)*dsize())
context_(context), data_(context_, size_of(dtype)*dsize()),
T(isaac::trans(*this))
{ }
array::array(int_t shape0, numeric_type dtype, driver::Buffer data, int_t start, int_t inc):
dtype_(dtype), shape_(shape0), start_(start, 0, 0, 0), stride_(inc), ld_(shape_[0]), context_(data.context()), data_(data)
dtype_(dtype), shape_(shape0), start_(start, 0, 0, 0), stride_(inc), ld_(shape_[0]), context_(data.context()), data_(data),
T(isaac::trans(*this))
{ }
template<class DT>
array::array(std::vector<DT> const & x, driver::Context const & context):
dtype_(to_numeric_type<DT>::value), shape_((int_t)x.size(), 1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape_[0]),
context_(context), data_(context, size_of(dtype_)*dsize())
context_(context), data_(context, size_of(dtype_)*dsize()),
T(isaac::trans(*this))
{ *this = x; }
array::array(array & v, slice const & s0) : dtype_(v.dtype_), shape_(s0.size, 1, 1, 1), start_(v.start_[0] + v.stride_[0]*s0.start, 0, 0, 0), stride_(v.stride_[0]*s0.stride, 1, 1, 1),
ld_(v.ld_), context_(v.context()), data_(v.data_)
array::array(array & v, slice const & s0) :
dtype_(v.dtype_), shape_(s0.size, 1, 1, 1), start_(v.start_[0] + v.stride_[0]*s0.start, 0, 0, 0), stride_(v.stride_[0]*s0.stride, 1, 1, 1),
ld_(v.ld_), context_(v.context()), data_(v.data_),
T(isaac::trans(*this))
{}
#define INSTANTIATE(T) template ISAACAPI array::array(std::vector<T> const &, driver::Context const &)
@@ -58,18 +63,24 @@ INSTANTIATE(double);
#undef INSTANTIATE
// 2D
array::array(int_t shape0, int_t shape1, numeric_type dtype, driver::Context const & context) : dtype_(dtype), shape_(shape0, shape1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape0),
context_(context), data_(context_, size_of(dtype_)*dsize())
array::array(int_t shape0, int_t shape1, numeric_type dtype, driver::Context const & context) :
dtype_(dtype), shape_(shape0, shape1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape0),
context_(context), data_(context_, size_of(dtype_)*dsize()),
T(isaac::trans(*this))
{}
array::array(int_t shape0, int_t shape1, numeric_type dtype, driver::Buffer data, int_t start, int_t ld) :
dtype_(dtype), shape_(shape0, shape1), start_(start, 0, 0, 0), stride_(1, 1, 1, 1), ld_(ld), context_(data.context()), data_(data)
dtype_(dtype), shape_(shape0, shape1), start_(start, 0, 0, 0), stride_(1, 1, 1, 1),
ld_(ld), context_(data.context()), data_(data),
T(isaac::trans(*this))
{ }
array::array(array & M, slice const & s0, slice const & s1) : dtype_(M.dtype_), shape_(s0.size, s1.size, 1, 1),
start_(M.start_[0] + M.stride_[0]*s0.start, M.start_[1] + M.stride_[1]*s1.start, 0, 0),
stride_(M.stride_[0]*s0.stride, M.stride_[1]*s1.stride, 1, 1), ld_(M.ld_),
context_(M.data_.context()), data_(M.data_)
array::array(array & M, slice const & s0, slice const & s1) :
dtype_(M.dtype_), shape_(s0.size, s1.size, 1, 1),
start_(M.start_[0] + M.stride_[0]*s0.start, M.start_[1] + M.stride_[1]*s1.start, 0, 0),
stride_(M.stride_[0]*s0.stride, M.stride_[1]*s1.stride, 1, 1), ld_(M.ld_),
context_(M.data_.context()), data_(M.data_),
T(isaac::trans(*this))
{ }
@@ -77,20 +88,24 @@ template<typename DT>
array::array(int_t shape0, int_t shape1, std::vector<DT> const & data, driver::Context const & context)
: dtype_(to_numeric_type<DT>::value),
shape_(shape0, shape1), start_(0, 0), stride_(1, 1), ld_(shape0),
context_(context), data_(context_, size_of(dtype_)*dsize())
context_(context), data_(context_, size_of(dtype_)*dsize()),
T(isaac::trans(*this))
{
isaac::copy(data, *this);
}
// 3D
array::array(int_t shape0, int_t shape1, int_t shape2, numeric_type dtype, driver::Context const & context) : dtype_(dtype), shape_(shape0, shape1, shape2, 1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape0),
context_(context), data_(context_, size_of(dtype_)*dsize())
array::array(int_t shape0, int_t shape1, int_t shape2, numeric_type dtype, driver::Context const & context) :
dtype_(dtype), shape_(shape0, shape1, shape2, 1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape0),
context_(context), data_(context_, size_of(dtype_)*dsize()),
T(isaac::trans(*this))
{}
//Slices
array::array(numeric_type dtype, driver::Buffer data, slice const & s0, slice const & s1, int_t ld):
dtype_(dtype), shape_(s0.size, s1.size), start_(s0.start, s1.start), stride_(s0.stride, s1.stride),
ld_(ld), context_(data.context()), data_(data)
ld_(ld), context_(data.context()), data_(data),
T(isaac::trans(*this))
{ }
@@ -112,9 +127,11 @@ INSTANTIATE(double);
array::array(math_expression const & proxy) : array(execution_handler(proxy)){}
array::array(array const & other): dtype_(other.dtype()),
array::array(array const & other):
dtype_(other.dtype()),
shape_(other.shape()), start_(0,0), stride_(1, 1), ld_(shape_[0]),
context_(other.context()), data_(context_, size_of(dtype_)*dsize())
context_(other.context()), data_(context_, size_of(dtype_)*dsize()),
T(isaac::trans(*this))
{
*this = other;
}
@@ -122,7 +139,8 @@ array::array(array const & other): dtype_(other.dtype()),
array::array(execution_handler const & other) :
dtype_(other.x().dtype()),
shape_(other.x().shape()), start_(0,0), stride_(1, 1), ld_(shape_[0]),
context_(other.x().context()), data_(context_, size_of(dtype_)*dsize())
context_(other.x().context()), data_(context_, size_of(dtype_)*dsize()),
T(isaac::trans(*this))
{
*this = other;
}
@@ -266,9 +284,6 @@ array & array::operator/=(array const & rhs)
array & array::operator/=(math_expression const & rhs)
{ return *this = math_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), rhs.context(), dtype_, shape_); }
math_expression array::T() const
{ return isaac::trans(*this) ;}
/*--- Indexing operators -----*/
//---------------------------------------
math_expression array::operator[](for_idx_t idx) const

View File

@@ -156,7 +156,7 @@ extern "C"
\
sc::driver::Context const & context = A.context();\
if(transA==clblasTrans)\
execute(sc::assign(y, alpha*dot(A.T(), x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
execute(sc::assign(y, alpha*dot(A.T, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
else\
execute(sc::assign(y, alpha*dot(A, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
return clblasSuccess;\
@@ -198,11 +198,11 @@ extern "C"
sc::driver::Context const & context = C.context();\
/*Operation*/\
if((transA==clblasTrans) && (transB==clblasTrans))\
execute(sc::assign(C, alpha*dot(A.T(), B.T()) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
execute(sc::assign(C, alpha*dot(A.T, B.T) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
else if((transA==clblasTrans) && (transB==clblasNoTrans))\
execute(sc::assign(C, alpha*dot(A.T(), B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
execute(sc::assign(C, alpha*dot(A.T, B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
else if((transA==clblasNoTrans) && (transB==clblasTrans))\
execute(sc::assign(C, alpha*dot(A, B.T()) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
execute(sc::assign(C, alpha*dot(A, B.T) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
else\
execute(sc::assign(C, alpha*dot(A, B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
return clblasSuccess;\

View File

@@ -73,7 +73,7 @@ def main():
libraries += ['gnustl_shared']
#Source files
src = 'src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/profiles/profiles.cpp src/lib/profiles/presets.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/handle.cpp src/lib/driver/dispatch.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/program_cache.cpp src/lib/driver/command_queue.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/program.cpp src/lib/driver/kernel.cpp src/lib/driver/device.cpp src/lib/driver/check.cpp src/lib/driver/context.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/io.cpp src/lib/array.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/binder.cpp src/lib/kernels/keywords.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
src = 'src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/array.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/binder.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/stream.cpp src/lib/kernels/keywords.cpp src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/command_queue.cpp src/lib/driver/handle.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/context.cpp src/lib/driver/program_cache.cpp src/lib/driver/ndrange.cpp src/lib/driver/check.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/dispatch.cpp src/lib/driver/kernel.cpp src/lib/driver/device.cpp src/lib/driver/program.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]