API: clearer interface for transposition
This commit is contained in:
@@ -234,8 +234,8 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
#ifdef HAS_A_BLAS
|
||||
int_t lda = A.ld();
|
||||
#endif
|
||||
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T(),x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
|
||||
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T(),x):dot(A,x), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)),(M*N + M + N)*dtsize/t);
|
||||
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T,x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
|
||||
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T,x):dot(A,x), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)),(M*N + M + N)*dtsize/t);
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(y.context().backend()==sc::driver::OPENCL)
|
||||
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t)
|
||||
@@ -317,8 +317,8 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
#ifdef HAS_A_BLAS
|
||||
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
|
||||
#endif
|
||||
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t);
|
||||
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
|
||||
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t);
|
||||
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
|
||||
/* clblas */
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(C.context().backend()==sc::driver::OPENCL)
|
||||
|
@@ -91,7 +91,7 @@ public:
|
||||
array operator[](slice const &);
|
||||
array operator()(slice const &, slice const &);
|
||||
|
||||
math_expression T() const;
|
||||
|
||||
protected:
|
||||
numeric_type dtype_;
|
||||
|
||||
@@ -102,6 +102,9 @@ protected:
|
||||
|
||||
driver::Context context_;
|
||||
driver::Buffer data_;
|
||||
|
||||
public:
|
||||
math_expression T;
|
||||
};
|
||||
|
||||
class ISAACAPI scalar : public array
|
||||
|
@@ -24,22 +24,27 @@ namespace isaac
|
||||
|
||||
array::array(int_t shape0, numeric_type dtype, driver::Context const & context) :
|
||||
dtype_(dtype), shape_(shape0, 1, 1, 1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape_[0]),
|
||||
context_(context), data_(context_, size_of(dtype)*dsize())
|
||||
context_(context), data_(context_, size_of(dtype)*dsize()),
|
||||
T(isaac::trans(*this))
|
||||
{ }
|
||||
|
||||
array::array(int_t shape0, numeric_type dtype, driver::Buffer data, int_t start, int_t inc):
|
||||
dtype_(dtype), shape_(shape0), start_(start, 0, 0, 0), stride_(inc), ld_(shape_[0]), context_(data.context()), data_(data)
|
||||
dtype_(dtype), shape_(shape0), start_(start, 0, 0, 0), stride_(inc), ld_(shape_[0]), context_(data.context()), data_(data),
|
||||
T(isaac::trans(*this))
|
||||
{ }
|
||||
|
||||
|
||||
template<class DT>
|
||||
array::array(std::vector<DT> const & x, driver::Context const & context):
|
||||
dtype_(to_numeric_type<DT>::value), shape_((int_t)x.size(), 1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape_[0]),
|
||||
context_(context), data_(context, size_of(dtype_)*dsize())
|
||||
context_(context), data_(context, size_of(dtype_)*dsize()),
|
||||
T(isaac::trans(*this))
|
||||
{ *this = x; }
|
||||
|
||||
array::array(array & v, slice const & s0) : dtype_(v.dtype_), shape_(s0.size, 1, 1, 1), start_(v.start_[0] + v.stride_[0]*s0.start, 0, 0, 0), stride_(v.stride_[0]*s0.stride, 1, 1, 1),
|
||||
ld_(v.ld_), context_(v.context()), data_(v.data_)
|
||||
array::array(array & v, slice const & s0) :
|
||||
dtype_(v.dtype_), shape_(s0.size, 1, 1, 1), start_(v.start_[0] + v.stride_[0]*s0.start, 0, 0, 0), stride_(v.stride_[0]*s0.stride, 1, 1, 1),
|
||||
ld_(v.ld_), context_(v.context()), data_(v.data_),
|
||||
T(isaac::trans(*this))
|
||||
{}
|
||||
|
||||
#define INSTANTIATE(T) template ISAACAPI array::array(std::vector<T> const &, driver::Context const &)
|
||||
@@ -58,18 +63,24 @@ INSTANTIATE(double);
|
||||
#undef INSTANTIATE
|
||||
|
||||
// 2D
|
||||
array::array(int_t shape0, int_t shape1, numeric_type dtype, driver::Context const & context) : dtype_(dtype), shape_(shape0, shape1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape0),
|
||||
context_(context), data_(context_, size_of(dtype_)*dsize())
|
||||
array::array(int_t shape0, int_t shape1, numeric_type dtype, driver::Context const & context) :
|
||||
dtype_(dtype), shape_(shape0, shape1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape0),
|
||||
context_(context), data_(context_, size_of(dtype_)*dsize()),
|
||||
T(isaac::trans(*this))
|
||||
{}
|
||||
|
||||
array::array(int_t shape0, int_t shape1, numeric_type dtype, driver::Buffer data, int_t start, int_t ld) :
|
||||
dtype_(dtype), shape_(shape0, shape1), start_(start, 0, 0, 0), stride_(1, 1, 1, 1), ld_(ld), context_(data.context()), data_(data)
|
||||
dtype_(dtype), shape_(shape0, shape1), start_(start, 0, 0, 0), stride_(1, 1, 1, 1),
|
||||
ld_(ld), context_(data.context()), data_(data),
|
||||
T(isaac::trans(*this))
|
||||
{ }
|
||||
|
||||
array::array(array & M, slice const & s0, slice const & s1) : dtype_(M.dtype_), shape_(s0.size, s1.size, 1, 1),
|
||||
start_(M.start_[0] + M.stride_[0]*s0.start, M.start_[1] + M.stride_[1]*s1.start, 0, 0),
|
||||
stride_(M.stride_[0]*s0.stride, M.stride_[1]*s1.stride, 1, 1), ld_(M.ld_),
|
||||
context_(M.data_.context()), data_(M.data_)
|
||||
array::array(array & M, slice const & s0, slice const & s1) :
|
||||
dtype_(M.dtype_), shape_(s0.size, s1.size, 1, 1),
|
||||
start_(M.start_[0] + M.stride_[0]*s0.start, M.start_[1] + M.stride_[1]*s1.start, 0, 0),
|
||||
stride_(M.stride_[0]*s0.stride, M.stride_[1]*s1.stride, 1, 1), ld_(M.ld_),
|
||||
context_(M.data_.context()), data_(M.data_),
|
||||
T(isaac::trans(*this))
|
||||
{ }
|
||||
|
||||
|
||||
@@ -77,20 +88,24 @@ template<typename DT>
|
||||
array::array(int_t shape0, int_t shape1, std::vector<DT> const & data, driver::Context const & context)
|
||||
: dtype_(to_numeric_type<DT>::value),
|
||||
shape_(shape0, shape1), start_(0, 0), stride_(1, 1), ld_(shape0),
|
||||
context_(context), data_(context_, size_of(dtype_)*dsize())
|
||||
context_(context), data_(context_, size_of(dtype_)*dsize()),
|
||||
T(isaac::trans(*this))
|
||||
{
|
||||
isaac::copy(data, *this);
|
||||
}
|
||||
|
||||
// 3D
|
||||
array::array(int_t shape0, int_t shape1, int_t shape2, numeric_type dtype, driver::Context const & context) : dtype_(dtype), shape_(shape0, shape1, shape2, 1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape0),
|
||||
context_(context), data_(context_, size_of(dtype_)*dsize())
|
||||
array::array(int_t shape0, int_t shape1, int_t shape2, numeric_type dtype, driver::Context const & context) :
|
||||
dtype_(dtype), shape_(shape0, shape1, shape2, 1), start_(0, 0, 0, 0), stride_(1, 1, 1, 1), ld_(shape0),
|
||||
context_(context), data_(context_, size_of(dtype_)*dsize()),
|
||||
T(isaac::trans(*this))
|
||||
{}
|
||||
|
||||
//Slices
|
||||
array::array(numeric_type dtype, driver::Buffer data, slice const & s0, slice const & s1, int_t ld):
|
||||
dtype_(dtype), shape_(s0.size, s1.size), start_(s0.start, s1.start), stride_(s0.stride, s1.stride),
|
||||
ld_(ld), context_(data.context()), data_(data)
|
||||
ld_(ld), context_(data.context()), data_(data),
|
||||
T(isaac::trans(*this))
|
||||
{ }
|
||||
|
||||
|
||||
@@ -112,9 +127,11 @@ INSTANTIATE(double);
|
||||
|
||||
array::array(math_expression const & proxy) : array(execution_handler(proxy)){}
|
||||
|
||||
array::array(array const & other): dtype_(other.dtype()),
|
||||
array::array(array const & other):
|
||||
dtype_(other.dtype()),
|
||||
shape_(other.shape()), start_(0,0), stride_(1, 1), ld_(shape_[0]),
|
||||
context_(other.context()), data_(context_, size_of(dtype_)*dsize())
|
||||
context_(other.context()), data_(context_, size_of(dtype_)*dsize()),
|
||||
T(isaac::trans(*this))
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
@@ -122,7 +139,8 @@ array::array(array const & other): dtype_(other.dtype()),
|
||||
array::array(execution_handler const & other) :
|
||||
dtype_(other.x().dtype()),
|
||||
shape_(other.x().shape()), start_(0,0), stride_(1, 1), ld_(shape_[0]),
|
||||
context_(other.x().context()), data_(context_, size_of(dtype_)*dsize())
|
||||
context_(other.x().context()), data_(context_, size_of(dtype_)*dsize()),
|
||||
T(isaac::trans(*this))
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
@@ -266,9 +284,6 @@ array & array::operator/=(array const & rhs)
|
||||
array & array::operator/=(math_expression const & rhs)
|
||||
{ return *this = math_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), rhs.context(), dtype_, shape_); }
|
||||
|
||||
math_expression array::T() const
|
||||
{ return isaac::trans(*this) ;}
|
||||
|
||||
/*--- Indexing operators -----*/
|
||||
//---------------------------------------
|
||||
math_expression array::operator[](for_idx_t idx) const
|
||||
|
@@ -156,7 +156,7 @@ extern "C"
|
||||
\
|
||||
sc::driver::Context const & context = A.context();\
|
||||
if(transA==clblasTrans)\
|
||||
execute(sc::assign(y, alpha*dot(A.T(), x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
execute(sc::assign(y, alpha*dot(A.T, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
else\
|
||||
execute(sc::assign(y, alpha*dot(A, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
return clblasSuccess;\
|
||||
@@ -198,11 +198,11 @@ extern "C"
|
||||
sc::driver::Context const & context = C.context();\
|
||||
/*Operation*/\
|
||||
if((transA==clblasTrans) && (transB==clblasTrans))\
|
||||
execute(sc::assign(C, alpha*dot(A.T(), B.T()) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
execute(sc::assign(C, alpha*dot(A.T, B.T) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
else if((transA==clblasTrans) && (transB==clblasNoTrans))\
|
||||
execute(sc::assign(C, alpha*dot(A.T(), B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
execute(sc::assign(C, alpha*dot(A.T, B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
else if((transA==clblasNoTrans) && (transB==clblasTrans))\
|
||||
execute(sc::assign(C, alpha*dot(A, B.T()) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
execute(sc::assign(C, alpha*dot(A, B.T) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
else\
|
||||
execute(sc::assign(C, alpha*dot(A, B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
return clblasSuccess;\
|
||||
|
@@ -73,7 +73,7 @@ def main():
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/profiles/profiles.cpp src/lib/profiles/presets.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/handle.cpp src/lib/driver/dispatch.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/program_cache.cpp src/lib/driver/command_queue.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/program.cpp src/lib/driver/kernel.cpp src/lib/driver/device.cpp src/lib/driver/check.cpp src/lib/driver/context.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/io.cpp src/lib/array.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/binder.cpp src/lib/kernels/keywords.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/array.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/binder.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/stream.cpp src/lib/kernels/keywords.cpp src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/command_queue.cpp src/lib/driver/handle.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/context.cpp src/lib/driver/program_cache.cpp src/lib/driver/ndrange.cpp src/lib/driver/check.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/dispatch.cpp src/lib/driver/kernel.cpp src/lib/driver/device.cpp src/lib/driver/program.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
Reference in New Issue
Block a user