cuBLAS: preparing field for proper compliant cublasHandle_t behavior
This commit is contained in:
@@ -24,229 +24,248 @@
|
||||
#include "cublas.h"
|
||||
|
||||
namespace sc = isaac;
|
||||
using sc::driver::Buffer;
|
||||
using sc::assign;
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
struct cublasContext
|
||||
{
|
||||
struct cublasContext
|
||||
{
|
||||
};
|
||||
|
||||
};
|
||||
static cublasHandle_t dft_handle = cublasHandle_t();
|
||||
|
||||
cublasStatus_t cublasCreate_v2 (cublasHandle_t *handle)
|
||||
{
|
||||
*handle = new cublasContext();
|
||||
return CUBLAS_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
cublasStatus_t cublasDestroy_v2 (cublasHandle_t handle)
|
||||
{
|
||||
delete handle;
|
||||
return cublasShutdown();
|
||||
}
|
||||
|
||||
cublasStatus cublasInit()
|
||||
{
|
||||
return CUBLAS_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
cublasStatus cublasShutdown()
|
||||
{
|
||||
isaac::runtime::profiles::release();
|
||||
isaac::driver::backend::release();
|
||||
return CUBLAS_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//*****************
|
||||
//BLAS1
|
||||
//*****************
|
||||
|
||||
//AXPY
|
||||
#define MAKE_AXPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
void cublas ## TYPE_CHAR ## axpy (int n, TYPE_CU alpha, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::array dy((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y,false), 0, incy); \
|
||||
sc::runtime::execute(sc::assign(dy, alpha*dx + dy));\
|
||||
}\
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## axpy_v2 (cublasHandle_t, int n, const TYPE_CU *alpha,\
|
||||
const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
cublas ## TYPE_CHAR ## axpy(n, *alpha, x, incx, y, incy);\
|
||||
return CUBLAS_STATUS_SUCCESS;\
|
||||
}
|
||||
|
||||
MAKE_AXPY(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_AXPY(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//COPY
|
||||
#define MAKE_COPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
void cublas ## TYPE_CHAR ## copy (int n, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::array dy((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y,false), 0, incy); \
|
||||
sc::runtime::execute(sc::assign(dy,dx));\
|
||||
}\
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## copy_v2 (cublasHandle_t, int n, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
cublas ## TYPE_CHAR ## copy(n, x, incx, y, incy);\
|
||||
return CUBLAS_STATUS_SUCCESS;\
|
||||
}
|
||||
|
||||
MAKE_COPY(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_COPY(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//SCAL
|
||||
#define MAKE_SCAL(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
void cublas ## TYPE_CHAR ## scal (int n, TYPE_CU alpha, TYPE_CU *x, int incx)\
|
||||
{\
|
||||
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::runtime::execute(sc::assign(dx,alpha*dx));\
|
||||
}\
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## scal_v2 (cublasHandle_t, int n, const TYPE_CU * alpha, TYPE_CU *x, int incx)\
|
||||
{\
|
||||
cublas ## TYPE_CHAR ## scal(n, *alpha, x, incx);\
|
||||
return CUBLAS_STATUS_SUCCESS;\
|
||||
}
|
||||
|
||||
MAKE_SCAL(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_SCAL(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//DOT
|
||||
#define MAKE_DOT(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
TYPE_CU cublas ## TYPE_CHAR ## dot (int n, const TYPE_CU *x, int incx, const TYPE_CU *y, int incy)\
|
||||
{\
|
||||
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::array dy((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y,false), 0, incy); \
|
||||
return sc::value_scalar(sc::dot(dx,dy));\
|
||||
}\
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## dot_v2 (cublasHandle_t, int n, const TYPE_CU *x, int incx, const TYPE_CU *y, int incy, TYPE_CU* result)\
|
||||
{\
|
||||
*result = cublas ## TYPE_CHAR ## dot(n, x, incx, y, incy);\
|
||||
return CUBLAS_STATUS_SUCCESS;\
|
||||
}
|
||||
|
||||
MAKE_DOT(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_DOT(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//ASUM
|
||||
#define MAKE_ASUM(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
TYPE_CU cublas ## TYPE_CHAR ## asum (int n, const TYPE_CU *x, int incx)\
|
||||
{\
|
||||
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
return sc::value_scalar(sum(abs(dx)));\
|
||||
}\
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## asum_v2 (cublasHandle_t, int n, const TYPE_CU *x, int incx, TYPE_CU* result)\
|
||||
{\
|
||||
*result = cublas ## TYPE_CHAR ## asum(n, x, incx);\
|
||||
return CUBLAS_STATUS_SUCCESS;\
|
||||
}
|
||||
|
||||
MAKE_ASUM(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_ASUM(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//*****************
|
||||
//BLAS2
|
||||
//*****************
|
||||
|
||||
#define MAKE_GEMV(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
void cublas ## TYPE_CHAR ## gemv (char trans, int m, int n, TYPE_CU alpha,\
|
||||
const TYPE_CU *A, int lda, const TYPE_CU *x, int incx,\
|
||||
TYPE_CU beta, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
sc::array dA((sc::int_t)m, (sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)A, false), 0, (sc::int_t)lda);\
|
||||
\
|
||||
sc::int_t sx = (sc::int_t)n, sy = (sc::int_t)m;\
|
||||
if(trans=='T') std::swap(sx, sy);\
|
||||
sc::array dx(sx, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x, false), 0, incx);\
|
||||
sc::array dy(sy, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y, false), 0, incy);\
|
||||
\
|
||||
if(trans=='T')\
|
||||
sc::runtime::execute(sc::assign(dy, alpha*dot(dA.T, dx) + beta*dy));\
|
||||
else\
|
||||
sc::runtime::execute(sc::assign(dy, alpha*dot(dA, dx) + beta*dy));\
|
||||
}\
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## gemv_v2 (cublasHandle_t, cublasOperation_t trans, int m, int n, const TYPE_CU *alpha,\
|
||||
const TYPE_CU *A, int lda, const TYPE_CU *x, int incx, const TYPE_CU *beta, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
if(trans==CUBLAS_OP_C)\
|
||||
return CUBLAS_STATUS_NOT_SUPPORTED;\
|
||||
cublas ## TYPE_CHAR ## gemv((trans==CUBLAS_OP_N)?'N':'T', m, n, *alpha, A, lda, x, incx, *beta, y, incy);\
|
||||
return CUBLAS_STATUS_SUCCESS;\
|
||||
}
|
||||
|
||||
MAKE_GEMV(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_GEMV(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
|
||||
#define MAKE_GER(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
void cublas ## TYPE_CHAR ## ger (int m, int n, TYPE_CU alpha, const TYPE_CU *x, int incx,\
|
||||
const TYPE_CU *y, int incy, TYPE_CU *A, int lda)\
|
||||
{\
|
||||
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::array dy((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y,false), 0, incy); \
|
||||
sc::array dA((sc::int_t)m, (sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)A, false), 0, (sc::int_t)lda);\
|
||||
sc::runtime::execute(sc::assign(dA, alpha*sc::outer(dx, dy) + dA));\
|
||||
}\
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## ger_v2 (cublasHandle_t, int m, int n, const TYPE_CU * alpha, const TYPE_CU *x, int incx,\
|
||||
const TYPE_CU *y, int incy, TYPE_CU *A, int lda)\
|
||||
{\
|
||||
cublas ## TYPE_CHAR ## ger(m, n, *alpha, x, incx, y, incy, A, lda);\
|
||||
return CUBLAS_STATUS_SUCCESS;\
|
||||
}
|
||||
|
||||
MAKE_GER(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_GER(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
|
||||
//*****************
|
||||
//BLAS3
|
||||
//*****************
|
||||
|
||||
#define MAKE_GEMM(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
void cublas ## TYPE_CHAR ## gemm (char transa, char transb, int m, int n, int k,\
|
||||
TYPE_CU alpha, const TYPE_CU *A, int lda,\
|
||||
const TYPE_CU *B, int ldb, TYPE_CU beta, TYPE_CU *C,\
|
||||
int ldc)\
|
||||
{\
|
||||
if(k==1 && m>1 && n>1){\
|
||||
sc::array dA((sc::int_t)m, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)A, false), 0, transa=='N'?1:lda);\
|
||||
sc::array dB((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)B, false), 0, transb=='T'?1:ldb);\
|
||||
sc::array dC((sc::int_t)m, (sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)C, false), 0, (sc::int_t)ldc);\
|
||||
sc::runtime::execute(sc::assign(dC, alpha*sc::outer(dA, dB) + beta*dC));\
|
||||
return;\
|
||||
}\
|
||||
sc::int_t As1 = (sc::int_t)m, As2 = (sc::int_t)k;\
|
||||
sc::int_t Bs1 = (sc::int_t)k, Bs2 = (sc::int_t)n;\
|
||||
if(transa=='T') std::swap(As1, As2);\
|
||||
if(transb=='T') std::swap(Bs1, Bs2);\
|
||||
/*Struct*/\
|
||||
sc::array dA(As1, As2, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)A, false), 0, (sc::int_t)lda);\
|
||||
sc::array dB(Bs1, Bs2, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)B, false), 0, (sc::int_t)ldb);\
|
||||
sc::array dC((sc::int_t)m, (sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)C, false), 0, (sc::int_t)ldc);\
|
||||
/*Operation*/\
|
||||
if((transa=='T') && (transb=='T'))\
|
||||
sc::runtime::execute(sc::assign(dC, alpha*dot(dA.T, dB.T) + beta*dC));\
|
||||
else if((transa=='T') && (transb=='N'))\
|
||||
sc::runtime::execute(sc::assign(dC, alpha*dot(dA.T, dB) + beta*dC));\
|
||||
else if((transa=='N') && (transb=='T'))\
|
||||
sc::runtime::execute(sc::assign(dC, alpha*dot(dA, dB.T) + beta*dC));\
|
||||
else\
|
||||
sc::runtime::execute(sc::assign(dC, alpha*dot(dA, dB) + beta*dC));\
|
||||
}\
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## gemm_v2(cublasHandle_t, cublasOperation_t transa, cublasOperation_t transb,\
|
||||
int m, int n, int k, const TYPE_CU *alpha, const TYPE_CU *A,\
|
||||
int lda, const TYPE_CU *B, int ldb,const TYPE_CU *beta, TYPE_CU *C, int ldc)\
|
||||
{\
|
||||
if(transa==CUBLAS_OP_C || transb==CUBLAS_OP_C)\
|
||||
return CUBLAS_STATUS_NOT_SUPPORTED;\
|
||||
cublas ## TYPE_CHAR ## gemm((transa==CUBLAS_OP_N)?'N':'T', (transb==CUBLAS_OP_N)?'N':'T', m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc);\
|
||||
return CUBLAS_STATUS_SUCCESS;\
|
||||
}
|
||||
|
||||
MAKE_GEMM(S, sc::FLOAT_TYPE, cl_float)
|
||||
MAKE_GEMM(D, sc::DOUBLE_TYPE, cl_double)
|
||||
cublasStatus cublasInit()
|
||||
{
|
||||
return CUBLAS_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
cublasStatus cublasShutdown()
|
||||
{
|
||||
isaac::runtime::profiles::release();
|
||||
isaac::driver::backend::release();
|
||||
return CUBLAS_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
cublasStatus_t cublasCreate_v2 (cublasHandle_t *handle)
|
||||
{
|
||||
*handle = new cublasContext();
|
||||
return CUBLAS_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
cublasStatus_t cublasDestroy_v2 (cublasHandle_t handle)
|
||||
{
|
||||
delete handle;
|
||||
return cublasShutdown();
|
||||
}
|
||||
|
||||
static cublasStatus_t execute(cublasHandle_t, sc::expression_tree const & op)
|
||||
{
|
||||
sc::runtime::execute(op);
|
||||
return CUBLAS_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
inline cublasOperation_t cvt_trans(char c)
|
||||
{
|
||||
if(c=='n' || c=='N') return CUBLAS_OP_N;
|
||||
if(c=='t' || c=='T') return CUBLAS_OP_T;
|
||||
return CUBLAS_OP_C;
|
||||
}
|
||||
|
||||
|
||||
//*****************
|
||||
//BLAS1
|
||||
//*****************
|
||||
|
||||
//AXPY
|
||||
#define MAKE_AXPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## axpy_v2 (cublasHandle_t handle, int n, const TYPE_CU *alpha,\
|
||||
const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::array dy(n, TYPE_ISAAC, Buffer((CUdeviceptr)y,false), 0, incy); \
|
||||
return execute(handle, assign(dy, *alpha*dx + dy));\
|
||||
}\
|
||||
\
|
||||
void cublas ## TYPE_CHAR ## axpy (int n, TYPE_CU alpha, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
|
||||
{ cublas ## TYPE_CHAR ## axpy_v2(dft_handle, n, &alpha, x, incx, y, incy); }
|
||||
|
||||
MAKE_AXPY(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_AXPY(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//COPY
|
||||
#define MAKE_COPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## copy_v2 (cublasHandle_t handle, int n, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::array dy(n, TYPE_ISAAC, Buffer((CUdeviceptr)y,false), 0, incy); \
|
||||
return execute(handle, assign(dy,dx));\
|
||||
}\
|
||||
\
|
||||
void cublas ## TYPE_CHAR ## copy (int n, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
|
||||
{ cublas ## TYPE_CHAR ## copy_v2(dft_handle, n, x, incx, y, incy); }
|
||||
|
||||
MAKE_COPY(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_COPY(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//SCAL
|
||||
#define MAKE_SCAL(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## scal_v2 (cublasHandle_t handle, int n, const TYPE_CU * alpha, TYPE_CU *x, int incx)\
|
||||
{\
|
||||
sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
return execute(handle, assign(dx,*alpha*dx));\
|
||||
}\
|
||||
\
|
||||
void cublas ## TYPE_CHAR ## scal (int n, TYPE_CU alpha, TYPE_CU *x, int incx)\
|
||||
{ cublas ## TYPE_CHAR ## scal_v2(dft_handle, n, &alpha, x, incx); }\
|
||||
|
||||
MAKE_SCAL(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_SCAL(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//DOT
|
||||
#define MAKE_DOT(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## dot_v2 (cublasHandle_t handle, int n, const TYPE_CU *x, int incx, const TYPE_CU *y, int incy, TYPE_CU* result)\
|
||||
{\
|
||||
sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::array dy(n, TYPE_ISAAC, Buffer((CUdeviceptr)y,false), 0, incy); \
|
||||
sc::scalar scr(TYPE_ISAAC);\
|
||||
cublasStatus_t status = execute(handle, assign(scr, sc::dot(dx,dy)));\
|
||||
*result = scr;\
|
||||
return status;\
|
||||
}\
|
||||
\
|
||||
TYPE_CU cublas ## TYPE_CHAR ## dot (int n, const TYPE_CU *x, int incx, const TYPE_CU *y, int incy)\
|
||||
{\
|
||||
TYPE_CU result;\
|
||||
cublas ## TYPE_CHAR ## dot_v2(dft_handle, n, x, incx, y, incy, &result);\
|
||||
return result;\
|
||||
}\
|
||||
|
||||
MAKE_DOT(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_DOT(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//ASUM
|
||||
#define MAKE_ASUM(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## asum_v2 (cublasHandle_t handle, int n, const TYPE_CU *x, int incx, TYPE_CU* result)\
|
||||
{\
|
||||
sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::scalar scr(TYPE_ISAAC);\
|
||||
cublasStatus_t status = execute(handle, assign(scr, sum(abs(dx))));\
|
||||
*result = scr;\
|
||||
return status;\
|
||||
}\
|
||||
\
|
||||
TYPE_CU cublas ## TYPE_CHAR ## asum (int n, const TYPE_CU *x, int incx)\
|
||||
{\
|
||||
TYPE_CU result;\
|
||||
cublas ## TYPE_CHAR ## asum_v2(dft_handle, n, x, incx, &result);\
|
||||
return result;\
|
||||
}\
|
||||
|
||||
MAKE_ASUM(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_ASUM(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
//*****************
|
||||
//BLAS2
|
||||
//*****************
|
||||
|
||||
#define MAKE_GEMV(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## gemv_v2 (cublasHandle_t handle, cublasOperation_t trans, int m, int n, const TYPE_CU *alpha,\
|
||||
const TYPE_CU *A, int lda, const TYPE_CU *x, int incx, const TYPE_CU *beta, TYPE_CU *y, int incy)\
|
||||
{\
|
||||
if(trans==CUBLAS_OP_C)\
|
||||
return CUBLAS_STATUS_NOT_SUPPORTED;\
|
||||
bool AT = trans==CUBLAS_OP_T;\
|
||||
sc::array dA(m, n, TYPE_ISAAC, Buffer((CUdeviceptr)A, false), 0, lda);\
|
||||
sc::int_t sx = n;\
|
||||
sc::int_t sy = m;\
|
||||
if(AT)\
|
||||
std::swap(sx, sy);\
|
||||
sc::array dx(sx, TYPE_ISAAC, Buffer((CUdeviceptr)x, false), 0, incx);\
|
||||
sc::array dy(sy, TYPE_ISAAC, Buffer((CUdeviceptr)y, false), 0, incy);\
|
||||
\
|
||||
if(AT)\
|
||||
return execute(handle, assign(dy, *alpha*dot(dA.T, dx) + *beta*dy));\
|
||||
else\
|
||||
return execute(handle, assign(dy, *alpha*dot(dA, dx) + *beta*dy));\
|
||||
}\
|
||||
\
|
||||
void cublas ## TYPE_CHAR ## gemv (char trans, int m, int n, TYPE_CU alpha,\
|
||||
const TYPE_CU *A, int lda, const TYPE_CU *x, int incx,\
|
||||
TYPE_CU beta, TYPE_CU *y, int incy)\
|
||||
{ cublas ## TYPE_CHAR ## gemv_v2(dft_handle, cvt_trans(trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy); }
|
||||
|
||||
MAKE_GEMV(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_GEMV(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
|
||||
#define MAKE_GER(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## ger_v2 (cublasHandle_t handle, int m, int n, const TYPE_CU * alpha, const TYPE_CU *x, int incx,\
|
||||
const TYPE_CU *y, int incy, TYPE_CU *A, int lda)\
|
||||
{\
|
||||
sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
|
||||
sc::array dy(n, TYPE_ISAAC, Buffer((CUdeviceptr)y,false), 0, incy); \
|
||||
sc::array dA(m, n, TYPE_ISAAC, Buffer((CUdeviceptr)A, false), 0, lda);\
|
||||
return execute(handle, assign(dA, *alpha*outer(dx, dy) + dA));\
|
||||
}\
|
||||
\
|
||||
void cublas ## TYPE_CHAR ## ger (int m, int n, TYPE_CU alpha, const TYPE_CU *x, int incx,\
|
||||
const TYPE_CU *y, int incy, TYPE_CU *A, int lda)\
|
||||
{ cublas ## TYPE_CHAR ## ger_v2(dft_handle, m, n, &alpha, x, incx, y, incy, A, lda); }\
|
||||
|
||||
MAKE_GER(S, sc::FLOAT_TYPE, float)
|
||||
MAKE_GER(D, sc::DOUBLE_TYPE, double)
|
||||
|
||||
|
||||
//*****************
|
||||
//BLAS3
|
||||
//*****************
|
||||
|
||||
#define MAKE_GEMM(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
|
||||
cublasStatus_t cublas ## TYPE_CHAR ## gemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,\
|
||||
int m, int n, int k, const TYPE_CU *alpha, const TYPE_CU *A,\
|
||||
int lda, const TYPE_CU *B, int ldb,const TYPE_CU *beta, TYPE_CU *C, int ldc)\
|
||||
{\
|
||||
if(transa==CUBLAS_OP_C || transb==CUBLAS_OP_C)\
|
||||
return CUBLAS_STATUS_NOT_SUPPORTED;\
|
||||
bool AT = transa==CUBLAS_OP_T;\
|
||||
bool BT = transb==CUBLAS_OP_T;\
|
||||
TYPE_CU a = *alpha;\
|
||||
TYPE_CU b = *beta;\
|
||||
if(k==1 && m>1 && n>1){\
|
||||
sc::array dA(m, TYPE_ISAAC, Buffer((CUdeviceptr)A, false), 0, AT?lda:1);\
|
||||
sc::array dB(n, TYPE_ISAAC, Buffer((CUdeviceptr)B, false), 0, BT?1:ldb);\
|
||||
sc::array dC(m, n, TYPE_ISAAC, Buffer((CUdeviceptr)C, false), 0, ldc);\
|
||||
return execute(handle, assign(dC, a*sc::outer(dA, dB) + b*dC));\
|
||||
}\
|
||||
sc::int_t As1 = m, As2 = k;\
|
||||
sc::int_t Bs1 = k, Bs2 = n;\
|
||||
if(AT)\
|
||||
std::swap(As1, As2);\
|
||||
if(BT)\
|
||||
std::swap(Bs1, Bs2);\
|
||||
/*Struct*/\
|
||||
sc::array dA(As1, As2, TYPE_ISAAC, Buffer((CUdeviceptr)A, false), 0, lda);\
|
||||
sc::array dB(Bs1, Bs2, TYPE_ISAAC, Buffer((CUdeviceptr)B, false), 0, ldb);\
|
||||
sc::array dC(m, n, TYPE_ISAAC, Buffer((CUdeviceptr)C, false), 0, ldc);\
|
||||
/*Operation*/\
|
||||
if(AT && BT)\
|
||||
return execute(handle, assign(dC, a*dot(dA.T, dB.T) + b*dC));\
|
||||
else if(AT && !BT)\
|
||||
return execute(handle, assign(dC, a*dot(dA.T, dB) + b*dC));\
|
||||
else if(!AT && BT)\
|
||||
return execute(handle, assign(dC, a*dot(dA, dB.T) + b*dC));\
|
||||
else\
|
||||
return execute(handle, assign(dC, a*dot(dA, dB) + b*dC));\
|
||||
}\
|
||||
\
|
||||
void cublas ## TYPE_CHAR ## gemm (char transa, char transb, int m, int n, int k,\
|
||||
TYPE_CU alpha, const TYPE_CU *A, int lda,\
|
||||
const TYPE_CU *B, int ldb, TYPE_CU beta, TYPE_CU *C,\
|
||||
int ldc)\
|
||||
{ cublas ## TYPE_CHAR ## gemm_v2(dft_handle, cvt_trans(transa), cvt_trans(transb), m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); }\
|
||||
|
||||
MAKE_GEMM(S, sc::FLOAT_TYPE, cl_float)
|
||||
MAKE_GEMM(D, sc::DOUBLE_TYPE, cl_double)
|
||||
}
|
||||
|
Reference in New Issue
Block a user