cuBLAS: preparing field for proper compliant cublasHandle_t behavior

This commit is contained in:
Philippe Tillet
2016-10-09 20:38:13 -04:00
parent d858b01313
commit faa06d32b9
2 changed files with 239 additions and 220 deletions

View File

@@ -24,229 +24,248 @@
#include "cublas.h" #include "cublas.h"
namespace sc = isaac; namespace sc = isaac;
using sc::driver::Buffer;
using sc::assign;
extern "C" extern "C"
{ {
struct cublasContext struct cublasContext
{ {
};
}; static cublasHandle_t dft_handle = cublasHandle_t();
cublasStatus_t cublasCreate_v2 (cublasHandle_t *handle) cublasStatus cublasInit()
{ {
*handle = new cublasContext(); return CUBLAS_STATUS_SUCCESS;
return CUBLAS_STATUS_SUCCESS; }
}
cublasStatus cublasShutdown()
cublasStatus_t cublasDestroy_v2 (cublasHandle_t handle) {
{ isaac::runtime::profiles::release();
delete handle; isaac::driver::backend::release();
return cublasShutdown(); return CUBLAS_STATUS_SUCCESS;
} }
cublasStatus cublasInit() cublasStatus_t cublasCreate_v2 (cublasHandle_t *handle)
{ {
return CUBLAS_STATUS_SUCCESS; *handle = new cublasContext();
} return CUBLAS_STATUS_SUCCESS;
}
cublasStatus cublasShutdown()
{ cublasStatus_t cublasDestroy_v2 (cublasHandle_t handle)
isaac::runtime::profiles::release(); {
isaac::driver::backend::release(); delete handle;
return CUBLAS_STATUS_SUCCESS; return cublasShutdown();
} }
static cublasStatus_t execute(cublasHandle_t, sc::expression_tree const & op)
{
sc::runtime::execute(op);
//***************** return CUBLAS_STATUS_SUCCESS;
//BLAS1 }
//*****************
inline cublasOperation_t cvt_trans(char c)
//AXPY {
#define MAKE_AXPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \ if(c=='n' || c=='N') return CUBLAS_OP_N;
void cublas ## TYPE_CHAR ## axpy (int n, TYPE_CU alpha, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\ if(c=='t' || c=='T') return CUBLAS_OP_T;
{\ return CUBLAS_OP_C;
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \ }
sc::array dy((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y,false), 0, incy); \
sc::runtime::execute(sc::assign(dy, alpha*dx + dy));\
}\ //*****************
cublasStatus_t cublas ## TYPE_CHAR ## axpy_v2 (cublasHandle_t, int n, const TYPE_CU *alpha,\ //BLAS1
const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\ //*****************
{\
cublas ## TYPE_CHAR ## axpy(n, *alpha, x, incx, y, incy);\ //AXPY
return CUBLAS_STATUS_SUCCESS;\ #define MAKE_AXPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
} cublasStatus_t cublas ## TYPE_CHAR ## axpy_v2 (cublasHandle_t handle, int n, const TYPE_CU *alpha,\
const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
MAKE_AXPY(S, sc::FLOAT_TYPE, float) {\
MAKE_AXPY(D, sc::DOUBLE_TYPE, double) sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
sc::array dy(n, TYPE_ISAAC, Buffer((CUdeviceptr)y,false), 0, incy); \
//COPY return execute(handle, assign(dy, *alpha*dx + dy));\
#define MAKE_COPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \ }\
void cublas ## TYPE_CHAR ## copy (int n, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\ \
{\ void cublas ## TYPE_CHAR ## axpy (int n, TYPE_CU alpha, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \ { cublas ## TYPE_CHAR ## axpy_v2(dft_handle, n, &alpha, x, incx, y, incy); }
sc::array dy((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y,false), 0, incy); \
sc::runtime::execute(sc::assign(dy,dx));\ MAKE_AXPY(S, sc::FLOAT_TYPE, float)
}\ MAKE_AXPY(D, sc::DOUBLE_TYPE, double)
cublasStatus_t cublas ## TYPE_CHAR ## copy_v2 (cublasHandle_t, int n, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
{\ //COPY
cublas ## TYPE_CHAR ## copy(n, x, incx, y, incy);\ #define MAKE_COPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
return CUBLAS_STATUS_SUCCESS;\ cublasStatus_t cublas ## TYPE_CHAR ## copy_v2 (cublasHandle_t handle, int n, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
} {\
sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
MAKE_COPY(S, sc::FLOAT_TYPE, float) sc::array dy(n, TYPE_ISAAC, Buffer((CUdeviceptr)y,false), 0, incy); \
MAKE_COPY(D, sc::DOUBLE_TYPE, double) return execute(handle, assign(dy,dx));\
}\
//SCAL \
#define MAKE_SCAL(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \ void cublas ## TYPE_CHAR ## copy (int n, const TYPE_CU *x, int incx, TYPE_CU *y, int incy)\
void cublas ## TYPE_CHAR ## scal (int n, TYPE_CU alpha, TYPE_CU *x, int incx)\ { cublas ## TYPE_CHAR ## copy_v2(dft_handle, n, x, incx, y, incy); }
{\
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \ MAKE_COPY(S, sc::FLOAT_TYPE, float)
sc::runtime::execute(sc::assign(dx,alpha*dx));\ MAKE_COPY(D, sc::DOUBLE_TYPE, double)
}\
cublasStatus_t cublas ## TYPE_CHAR ## scal_v2 (cublasHandle_t, int n, const TYPE_CU * alpha, TYPE_CU *x, int incx)\ //SCAL
{\ #define MAKE_SCAL(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
cublas ## TYPE_CHAR ## scal(n, *alpha, x, incx);\ cublasStatus_t cublas ## TYPE_CHAR ## scal_v2 (cublasHandle_t handle, int n, const TYPE_CU * alpha, TYPE_CU *x, int incx)\
return CUBLAS_STATUS_SUCCESS;\ {\
} sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
return execute(handle, assign(dx,*alpha*dx));\
MAKE_SCAL(S, sc::FLOAT_TYPE, float) }\
MAKE_SCAL(D, sc::DOUBLE_TYPE, double) \
void cublas ## TYPE_CHAR ## scal (int n, TYPE_CU alpha, TYPE_CU *x, int incx)\
//DOT { cublas ## TYPE_CHAR ## scal_v2(dft_handle, n, &alpha, x, incx); }\
#define MAKE_DOT(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
TYPE_CU cublas ## TYPE_CHAR ## dot (int n, const TYPE_CU *x, int incx, const TYPE_CU *y, int incy)\ MAKE_SCAL(S, sc::FLOAT_TYPE, float)
{\ MAKE_SCAL(D, sc::DOUBLE_TYPE, double)
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \
sc::array dy((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y,false), 0, incy); \ //DOT
return sc::value_scalar(sc::dot(dx,dy));\ #define MAKE_DOT(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
}\ cublasStatus_t cublas ## TYPE_CHAR ## dot_v2 (cublasHandle_t handle, int n, const TYPE_CU *x, int incx, const TYPE_CU *y, int incy, TYPE_CU* result)\
cublasStatus_t cublas ## TYPE_CHAR ## dot_v2 (cublasHandle_t, int n, const TYPE_CU *x, int incx, const TYPE_CU *y, int incy, TYPE_CU* result)\ {\
{\ sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
*result = cublas ## TYPE_CHAR ## dot(n, x, incx, y, incy);\ sc::array dy(n, TYPE_ISAAC, Buffer((CUdeviceptr)y,false), 0, incy); \
return CUBLAS_STATUS_SUCCESS;\ sc::scalar scr(TYPE_ISAAC);\
} cublasStatus_t status = execute(handle, assign(scr, sc::dot(dx,dy)));\
*result = scr;\
MAKE_DOT(S, sc::FLOAT_TYPE, float) return status;\
MAKE_DOT(D, sc::DOUBLE_TYPE, double) }\
\
//ASUM TYPE_CU cublas ## TYPE_CHAR ## dot (int n, const TYPE_CU *x, int incx, const TYPE_CU *y, int incy)\
#define MAKE_ASUM(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \ {\
TYPE_CU cublas ## TYPE_CHAR ## asum (int n, const TYPE_CU *x, int incx)\ TYPE_CU result;\
{\ cublas ## TYPE_CHAR ## dot_v2(dft_handle, n, x, incx, y, incy, &result);\
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \ return result;\
return sc::value_scalar(sum(abs(dx)));\ }\
}\
cublasStatus_t cublas ## TYPE_CHAR ## asum_v2 (cublasHandle_t, int n, const TYPE_CU *x, int incx, TYPE_CU* result)\ MAKE_DOT(S, sc::FLOAT_TYPE, float)
{\ MAKE_DOT(D, sc::DOUBLE_TYPE, double)
*result = cublas ## TYPE_CHAR ## asum(n, x, incx);\
return CUBLAS_STATUS_SUCCESS;\ //ASUM
} #define MAKE_ASUM(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
cublasStatus_t cublas ## TYPE_CHAR ## asum_v2 (cublasHandle_t handle, int n, const TYPE_CU *x, int incx, TYPE_CU* result)\
MAKE_ASUM(S, sc::FLOAT_TYPE, float) {\
MAKE_ASUM(D, sc::DOUBLE_TYPE, double) sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
sc::scalar scr(TYPE_ISAAC);\
//***************** cublasStatus_t status = execute(handle, assign(scr, sum(abs(dx))));\
//BLAS2 *result = scr;\
//***************** return status;\
}\
#define MAKE_GEMV(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \ \
void cublas ## TYPE_CHAR ## gemv (char trans, int m, int n, TYPE_CU alpha,\ TYPE_CU cublas ## TYPE_CHAR ## asum (int n, const TYPE_CU *x, int incx)\
const TYPE_CU *A, int lda, const TYPE_CU *x, int incx,\ {\
TYPE_CU beta, TYPE_CU *y, int incy)\ TYPE_CU result;\
{\ cublas ## TYPE_CHAR ## asum_v2(dft_handle, n, x, incx, &result);\
sc::array dA((sc::int_t)m, (sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)A, false), 0, (sc::int_t)lda);\ return result;\
\ }\
sc::int_t sx = (sc::int_t)n, sy = (sc::int_t)m;\
if(trans=='T') std::swap(sx, sy);\ MAKE_ASUM(S, sc::FLOAT_TYPE, float)
sc::array dx(sx, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x, false), 0, incx);\ MAKE_ASUM(D, sc::DOUBLE_TYPE, double)
sc::array dy(sy, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y, false), 0, incy);\
\ //*****************
if(trans=='T')\ //BLAS2
sc::runtime::execute(sc::assign(dy, alpha*dot(dA.T, dx) + beta*dy));\ //*****************
else\
sc::runtime::execute(sc::assign(dy, alpha*dot(dA, dx) + beta*dy));\ #define MAKE_GEMV(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
}\ cublasStatus_t cublas ## TYPE_CHAR ## gemv_v2 (cublasHandle_t handle, cublasOperation_t trans, int m, int n, const TYPE_CU *alpha,\
cublasStatus_t cublas ## TYPE_CHAR ## gemv_v2 (cublasHandle_t, cublasOperation_t trans, int m, int n, const TYPE_CU *alpha,\ const TYPE_CU *A, int lda, const TYPE_CU *x, int incx, const TYPE_CU *beta, TYPE_CU *y, int incy)\
const TYPE_CU *A, int lda, const TYPE_CU *x, int incx, const TYPE_CU *beta, TYPE_CU *y, int incy)\ {\
{\ if(trans==CUBLAS_OP_C)\
if(trans==CUBLAS_OP_C)\ return CUBLAS_STATUS_NOT_SUPPORTED;\
return CUBLAS_STATUS_NOT_SUPPORTED;\ bool AT = trans==CUBLAS_OP_T;\
cublas ## TYPE_CHAR ## gemv((trans==CUBLAS_OP_N)?'N':'T', m, n, *alpha, A, lda, x, incx, *beta, y, incy);\ sc::array dA(m, n, TYPE_ISAAC, Buffer((CUdeviceptr)A, false), 0, lda);\
return CUBLAS_STATUS_SUCCESS;\ sc::int_t sx = n;\
} sc::int_t sy = m;\
if(AT)\
MAKE_GEMV(S, sc::FLOAT_TYPE, float) std::swap(sx, sy);\
MAKE_GEMV(D, sc::DOUBLE_TYPE, double) sc::array dx(sx, TYPE_ISAAC, Buffer((CUdeviceptr)x, false), 0, incx);\
sc::array dy(sy, TYPE_ISAAC, Buffer((CUdeviceptr)y, false), 0, incy);\
\
#define MAKE_GER(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \ if(AT)\
void cublas ## TYPE_CHAR ## ger (int m, int n, TYPE_CU alpha, const TYPE_CU *x, int incx,\ return execute(handle, assign(dy, *alpha*dot(dA.T, dx) + *beta*dy));\
const TYPE_CU *y, int incy, TYPE_CU *A, int lda)\ else\
{\ return execute(handle, assign(dy, *alpha*dot(dA, dx) + *beta*dy));\
sc::array dx((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)x,false), 0, incx); \ }\
sc::array dy((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)y,false), 0, incy); \ \
sc::array dA((sc::int_t)m, (sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)A, false), 0, (sc::int_t)lda);\ void cublas ## TYPE_CHAR ## gemv (char trans, int m, int n, TYPE_CU alpha,\
sc::runtime::execute(sc::assign(dA, alpha*sc::outer(dx, dy) + dA));\ const TYPE_CU *A, int lda, const TYPE_CU *x, int incx,\
}\ TYPE_CU beta, TYPE_CU *y, int incy)\
cublasStatus_t cublas ## TYPE_CHAR ## ger_v2 (cublasHandle_t, int m, int n, const TYPE_CU * alpha, const TYPE_CU *x, int incx,\ { cublas ## TYPE_CHAR ## gemv_v2(dft_handle, cvt_trans(trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy); }
const TYPE_CU *y, int incy, TYPE_CU *A, int lda)\
{\ MAKE_GEMV(S, sc::FLOAT_TYPE, float)
cublas ## TYPE_CHAR ## ger(m, n, *alpha, x, incx, y, incy, A, lda);\ MAKE_GEMV(D, sc::DOUBLE_TYPE, double)
return CUBLAS_STATUS_SUCCESS;\
}
#define MAKE_GER(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
MAKE_GER(S, sc::FLOAT_TYPE, float) cublasStatus_t cublas ## TYPE_CHAR ## ger_v2 (cublasHandle_t handle, int m, int n, const TYPE_CU * alpha, const TYPE_CU *x, int incx,\
MAKE_GER(D, sc::DOUBLE_TYPE, double) const TYPE_CU *y, int incy, TYPE_CU *A, int lda)\
{\
sc::array dx(n, TYPE_ISAAC, Buffer((CUdeviceptr)x,false), 0, incx); \
//***************** sc::array dy(n, TYPE_ISAAC, Buffer((CUdeviceptr)y,false), 0, incy); \
//BLAS3 sc::array dA(m, n, TYPE_ISAAC, Buffer((CUdeviceptr)A, false), 0, lda);\
//***************** return execute(handle, assign(dA, *alpha*outer(dx, dy) + dA));\
}\
#define MAKE_GEMM(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \ \
void cublas ## TYPE_CHAR ## gemm (char transa, char transb, int m, int n, int k,\ void cublas ## TYPE_CHAR ## ger (int m, int n, TYPE_CU alpha, const TYPE_CU *x, int incx,\
TYPE_CU alpha, const TYPE_CU *A, int lda,\ const TYPE_CU *y, int incy, TYPE_CU *A, int lda)\
const TYPE_CU *B, int ldb, TYPE_CU beta, TYPE_CU *C,\ { cublas ## TYPE_CHAR ## ger_v2(dft_handle, m, n, &alpha, x, incx, y, incy, A, lda); }\
int ldc)\
{\ MAKE_GER(S, sc::FLOAT_TYPE, float)
if(k==1 && m>1 && n>1){\ MAKE_GER(D, sc::DOUBLE_TYPE, double)
sc::array dA((sc::int_t)m, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)A, false), 0, transa=='N'?1:lda);\
sc::array dB((sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)B, false), 0, transb=='T'?1:ldb);\
sc::array dC((sc::int_t)m, (sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)C, false), 0, (sc::int_t)ldc);\ //*****************
sc::runtime::execute(sc::assign(dC, alpha*sc::outer(dA, dB) + beta*dC));\ //BLAS3
return;\ //*****************
}\
sc::int_t As1 = (sc::int_t)m, As2 = (sc::int_t)k;\ #define MAKE_GEMM(TYPE_CHAR, TYPE_ISAAC, TYPE_CU) \
sc::int_t Bs1 = (sc::int_t)k, Bs2 = (sc::int_t)n;\ cublasStatus_t cublas ## TYPE_CHAR ## gemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,\
if(transa=='T') std::swap(As1, As2);\ int m, int n, int k, const TYPE_CU *alpha, const TYPE_CU *A,\
if(transb=='T') std::swap(Bs1, Bs2);\ int lda, const TYPE_CU *B, int ldb,const TYPE_CU *beta, TYPE_CU *C, int ldc)\
/*Struct*/\ {\
sc::array dA(As1, As2, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)A, false), 0, (sc::int_t)lda);\ if(transa==CUBLAS_OP_C || transb==CUBLAS_OP_C)\
sc::array dB(Bs1, Bs2, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)B, false), 0, (sc::int_t)ldb);\ return CUBLAS_STATUS_NOT_SUPPORTED;\
sc::array dC((sc::int_t)m, (sc::int_t)n, TYPE_ISAAC, sc::driver::Buffer((CUdeviceptr)C, false), 0, (sc::int_t)ldc);\ bool AT = transa==CUBLAS_OP_T;\
/*Operation*/\ bool BT = transb==CUBLAS_OP_T;\
if((transa=='T') && (transb=='T'))\ TYPE_CU a = *alpha;\
sc::runtime::execute(sc::assign(dC, alpha*dot(dA.T, dB.T) + beta*dC));\ TYPE_CU b = *beta;\
else if((transa=='T') && (transb=='N'))\ if(k==1 && m>1 && n>1){\
sc::runtime::execute(sc::assign(dC, alpha*dot(dA.T, dB) + beta*dC));\ sc::array dA(m, TYPE_ISAAC, Buffer((CUdeviceptr)A, false), 0, AT?lda:1);\
else if((transa=='N') && (transb=='T'))\ sc::array dB(n, TYPE_ISAAC, Buffer((CUdeviceptr)B, false), 0, BT?1:ldb);\
sc::runtime::execute(sc::assign(dC, alpha*dot(dA, dB.T) + beta*dC));\ sc::array dC(m, n, TYPE_ISAAC, Buffer((CUdeviceptr)C, false), 0, ldc);\
else\ return execute(handle, assign(dC, a*sc::outer(dA, dB) + b*dC));\
sc::runtime::execute(sc::assign(dC, alpha*dot(dA, dB) + beta*dC));\ }\
}\ sc::int_t As1 = m, As2 = k;\
cublasStatus_t cublas ## TYPE_CHAR ## gemm_v2(cublasHandle_t, cublasOperation_t transa, cublasOperation_t transb,\ sc::int_t Bs1 = k, Bs2 = n;\
int m, int n, int k, const TYPE_CU *alpha, const TYPE_CU *A,\ if(AT)\
int lda, const TYPE_CU *B, int ldb,const TYPE_CU *beta, TYPE_CU *C, int ldc)\ std::swap(As1, As2);\
{\ if(BT)\
if(transa==CUBLAS_OP_C || transb==CUBLAS_OP_C)\ std::swap(Bs1, Bs2);\
return CUBLAS_STATUS_NOT_SUPPORTED;\ /*Struct*/\
cublas ## TYPE_CHAR ## gemm((transa==CUBLAS_OP_N)?'N':'T', (transb==CUBLAS_OP_N)?'N':'T', m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc);\ sc::array dA(As1, As2, TYPE_ISAAC, Buffer((CUdeviceptr)A, false), 0, lda);\
return CUBLAS_STATUS_SUCCESS;\ sc::array dB(Bs1, Bs2, TYPE_ISAAC, Buffer((CUdeviceptr)B, false), 0, ldb);\
} sc::array dC(m, n, TYPE_ISAAC, Buffer((CUdeviceptr)C, false), 0, ldc);\
/*Operation*/\
MAKE_GEMM(S, sc::FLOAT_TYPE, cl_float) if(AT && BT)\
MAKE_GEMM(D, sc::DOUBLE_TYPE, cl_double) return execute(handle, assign(dC, a*dot(dA.T, dB.T) + b*dC));\
else if(AT && !BT)\
return execute(handle, assign(dC, a*dot(dA.T, dB) + b*dC));\
else if(!AT && BT)\
return execute(handle, assign(dC, a*dot(dA, dB.T) + b*dC));\
else\
return execute(handle, assign(dC, a*dot(dA, dB) + b*dC));\
}\
\
void cublas ## TYPE_CHAR ## gemm (char transa, char transb, int m, int n, int k,\
TYPE_CU alpha, const TYPE_CU *A, int lda,\
const TYPE_CU *B, int ldb, TYPE_CU beta, TYPE_CU *C,\
int ldc)\
{ cublas ## TYPE_CHAR ## gemm_v2(dft_handle, cvt_trans(transa), cvt_trans(transb), m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); }\
MAKE_GEMM(S, sc::FLOAT_TYPE, cl_float)
MAKE_GEMM(D, sc::DOUBLE_TYPE, cl_double)
} }

View File

@@ -73,7 +73,7 @@ def main():
libraries += ['gnustl_shared'] libraries += ['gnustl_shared']
#Source files #Source files
src = 'src/lib/random/rand.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/generation/base.cpp src/lib/runtime/execute.cpp src/lib/runtime/database.cpp src/lib/runtime/profiles.cpp src/lib/runtime/predictors/random_forest.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']] src = 'src/lib/runtime/predictors/random_forest.cpp src/lib/runtime/profiles.cpp src/lib/runtime/database.cpp src/lib/runtime/execute.cpp src/lib/exception/driver.cpp src/lib/exception/api.cpp src/lib/random/rand.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/object.cpp src/lib/value_scalar.cpp src/lib/array.cpp src/lib/api/blas/cublas.cpp src/lib/api/blas/clBLAS.cpp src/lib/driver/dispatch.cpp src/lib/driver/kernel.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/check.cpp src/lib/driver/command_queue.cpp src/lib/driver/handle.cpp src/lib/driver/context.cpp src/lib/driver/program.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/' boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']: for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x] src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]