BLAS: Added row-major support and tests
This commit is contained in:
@@ -153,6 +153,10 @@ extern "C"
|
||||
cl_uint numCommandQueues, cl_command_queue *commandQueues,\
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\
|
||||
{\
|
||||
if(order==clblasRowMajor){\
|
||||
std::swap(M, N);\
|
||||
transA = (transA==clblasTrans)?clblasNoTrans:clblasTrans;\
|
||||
}\
|
||||
is::int_t As1 = M, As2 = N;\
|
||||
if(transA==clblasTrans) std::swap(As1, As2);\
|
||||
is::array A(As1, As2, TYPE_ISAAC, cl::Buffer(mA), offA, lda);\
|
||||
@@ -165,7 +169,7 @@ extern "C"
|
||||
clRetainMemObject(my);\
|
||||
\
|
||||
is::driver::Context const & context = A.context();\
|
||||
if((transA==clblasTrans) ^ (order==clblasRowMajor))\
|
||||
if(transA==clblasTrans)\
|
||||
execute(is::detail::assign(y, alpha*dot(A.T(), x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
else\
|
||||
execute(is::detail::assign(y, alpha*dot(A, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
@@ -181,12 +185,22 @@ extern "C"
|
||||
#define MAKE_GEMM(TYPE_CHAR, TYPE_ISAAC, TYPE_CL) \
|
||||
clblasStatus clblas ## TYPE_CHAR ## gemm(clblasOrder order, clblasTranspose transA, clblasTranspose transB,\
|
||||
size_t M, size_t N, size_t K,\
|
||||
TYPE_CL alpha, const cl_mem mA, size_t offA, size_t lda,\
|
||||
const cl_mem mB, size_t offB, size_t ldb, TYPE_CL beta,\
|
||||
TYPE_CL alpha, const cl_mem cmA, size_t offA, size_t lda,\
|
||||
const cl_mem cmB, size_t offB, size_t ldb, TYPE_CL beta,\
|
||||
cl_mem mC, size_t offC, size_t ldc,\
|
||||
cl_uint numCommandQueues, cl_command_queue *commandQueues,\
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\
|
||||
{\
|
||||
cl_mem mA = cmA;\
|
||||
cl_mem mB = cmB;\
|
||||
if(order==clblasRowMajor){\
|
||||
std::swap(mA, mB);\
|
||||
std::swap(offA, offB);\
|
||||
std::swap(lda, ldb);\
|
||||
std::swap(M, N);\
|
||||
transA = (transA==clblasTrans)?clblasNoTrans:clblasTrans;\
|
||||
transB = (transB==clblasTrans)?clblasNoTrans:clblasTrans;\
|
||||
}\
|
||||
is::int_t As1 = M, As2 = K;\
|
||||
is::int_t Bs1 = K, Bs2 = N;\
|
||||
if(transA==clblasTrans) std::swap(As1, As2);\
|
||||
@@ -199,15 +213,13 @@ extern "C"
|
||||
is::array C(M, N, TYPE_ISAAC, cl::Buffer(mC), offC, ldc);\
|
||||
clRetainMemObject(mC);\
|
||||
is::driver::Context const & context = C.context();\
|
||||
bool AeffTrans = (transA==clblasTrans) ^ (order==clblasRowMajor);\
|
||||
bool BeffTrans = (transB==clblasTrans) ^ (order==clblasRowMajor);\
|
||||
/*Operation*/\
|
||||
if(AeffTrans && BeffTrans){\
|
||||
if((transA==clblasTrans) && (transB==clblasTrans)){\
|
||||
execute(is::detail::assign(C, alpha*dot(A.T(), B.T()) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
}\
|
||||
else if(AeffTrans && !BeffTrans)\
|
||||
else if((transA==clblasTrans) && (transB==clblasNoTrans))\
|
||||
execute(is::detail::assign(C, alpha*dot(A.T(), B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
else if(!AeffTrans && BeffTrans)\
|
||||
else if((transA==clblasNoTrans) && (transB==clblasTrans))\
|
||||
execute(is::detail::assign(C, alpha*dot(A, B.T()) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
else\
|
||||
execute(is::detail::assign(C, alpha*dot(A, B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
|
@@ -14,7 +14,7 @@ enum interface_t{clBLAS, CPP};
|
||||
|
||||
#define CHANDLE(X) (*X.data().handle().cl)()
|
||||
#define OFF(X) X.start()[0] + X.start()[1]*X.ld()
|
||||
#define LD(X) X.ld()*X.stride()[1]
|
||||
#define LD(X) X.ld()
|
||||
|
||||
/*------ Simple Vector ---------*/
|
||||
template<class T>
|
||||
|
@@ -9,7 +9,7 @@ namespace ad = isaac;
|
||||
template<typename T>
|
||||
void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> const & cA, simple_matrix_base<T> const & cB,
|
||||
ad::array & C, ad::array const & A, ad::array const & AT, ad::array const & B, ad::array const & BT,
|
||||
interface_t interface)
|
||||
interface_t interface, const char * prefix)
|
||||
{
|
||||
int failure_count = 0;
|
||||
|
||||
@@ -39,10 +39,9 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
|
||||
cCbuffer[i + j*M] = cC(i,j);
|
||||
|
||||
std::vector<T> buffer(M*N);
|
||||
const char * PREFIX = interface==clBLAS?"[BLAS]":"[C++]";
|
||||
|
||||
#define RUN_TEST(NAME, GPU_OP)\
|
||||
std::cout << PREFIX << " " << NAME << "..." << std::flush;\
|
||||
std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\
|
||||
GPU_OP;\
|
||||
ad::copy(C, buffer);\
|
||||
if(diff(buffer, cCbuffer, epsilon))\
|
||||
@@ -56,13 +55,25 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
|
||||
if(interface==clBLAS)
|
||||
{
|
||||
cl_command_queue clqueue = (*queue.handle().cl)();
|
||||
RUN_TEST("GEMM(N, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasNoTrans, clblasNoTrans, M, N, K, alpha, CHANDLE(A), OFF(A), LD(A),
|
||||
|
||||
//Row-major
|
||||
RUN_TEST("GEMM(ROW, N, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasTrans, clblasTrans, N, M, K, alpha, CHANDLE(B), OFF(B), LD(B),
|
||||
CHANDLE(A), OFF(A), LD(A), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||
RUN_TEST("GEMM(ROW, N, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasTrans, clblasNoTrans, N, M, K, alpha, CHANDLE(BT), OFF(BT), LD(BT),
|
||||
CHANDLE(A), OFF(A), LD(A), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||
RUN_TEST("GEMM(ROW, T, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasNoTrans, clblasTrans, N, M, K, alpha, CHANDLE(B), OFF(B), LD(B),
|
||||
CHANDLE(AT), OFF(AT), LD(AT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||
RUN_TEST("GEMM(ROW, T, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasNoTrans, clblasNoTrans, N, M, K, alpha, CHANDLE(BT), OFF(BT), LD(BT),
|
||||
CHANDLE(AT), OFF(AT), LD(AT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||
|
||||
//Column-major
|
||||
RUN_TEST("GEMM(COL, N, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasNoTrans, clblasNoTrans, M, N, K, alpha, CHANDLE(A), OFF(A), LD(A),
|
||||
CHANDLE(B), OFF(B), LD(B), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||
RUN_TEST("GEMM(N, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasNoTrans, clblasTrans, M, N, K, alpha, CHANDLE(A), OFF(A), LD(A),
|
||||
RUN_TEST("GEMM(COL, N, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasNoTrans, clblasTrans, M, N, K, alpha, CHANDLE(A), OFF(A), LD(A),
|
||||
CHANDLE(BT), OFF(BT), LD(BT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||
RUN_TEST("GEMM(T, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasTrans, clblasNoTrans, M, N, K, alpha, CHANDLE(AT), OFF(AT), LD(AT),
|
||||
RUN_TEST("GEMM(COL, T, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasTrans, clblasNoTrans, M, N, K, alpha, CHANDLE(AT), OFF(AT), LD(AT),
|
||||
CHANDLE(B), OFF(B), LD(B), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||
RUN_TEST("GEMM(T, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasTrans, clblasTrans, M, N, K, alpha, CHANDLE(AT), OFF(AT), LD(AT),
|
||||
RUN_TEST("GEMM(COL, T, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasTrans, clblasTrans, M, N, K, alpha, CHANDLE(AT), OFF(AT), LD(AT),
|
||||
CHANDLE(BT), OFF(BT), LD(BT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||
}
|
||||
else
|
||||
@@ -89,14 +100,22 @@ void test_impl(T epsilon, ad::driver::Context const & ctx)
|
||||
int_t SUBN = 75;
|
||||
int_t SUBK = 83;
|
||||
|
||||
{
|
||||
INIT_MATRIX(M, SUBM, 5, 1, N, SUBN, 7, 1, cC, C, ctx);
|
||||
INIT_MATRIX(M, SUBM, 8, 1, K, SUBK, 4, 1, cA, A, ctx);
|
||||
INIT_MATRIX(K, SUBK, 9, 1, N, SUBN, 6, 1, cB, B, ctx);
|
||||
test_impl(epsilon, cC_full, cA_full, cB_full, C_full, A_full, AT_full, B_full, BT_full, clBLAS, "BLAS, FULL");
|
||||
test_impl(epsilon, cC_slice, cA_slice, cB_slice, C_slice, A_slice, AT_slice, B_slice, BT_slice, clBLAS, "BLAS, SUB");
|
||||
}
|
||||
|
||||
{
|
||||
INIT_MATRIX(M, SUBM, 5, 2, N, SUBN, 7, 3, cC, C, ctx);
|
||||
INIT_MATRIX(M, SUBM, 8, 2, K, SUBK, 4, 3, cA, A, ctx);
|
||||
INIT_MATRIX(K, SUBK, 9, 4, N, SUBN, 6, 2, cB, B, ctx);
|
||||
std::cout << "full..." << std::endl;
|
||||
test_impl(epsilon, cC_full, cA_full, cB_full, C_full, A_full, AT_full, B_full, BT_full, clBLAS);
|
||||
test_impl(epsilon, cC_full, cA_full, cB_full, C_full, A_full, AT_full, B_full, BT_full, CPP);
|
||||
std::cout << "slice..." << std::endl;
|
||||
test_impl(epsilon, cC_slice, cA_slice, cB_slice, C_slice, A_slice, AT_slice, B_slice, BT_slice, CPP);
|
||||
test_impl(epsilon, cC_full, cA_full, cB_full, C_full, A_full, AT_full, B_full, BT_full, CPP, "C++, FULL");
|
||||
test_impl(epsilon, cC_slice, cA_slice, cB_slice, C_slice, A_slice, AT_slice, B_slice, BT_slice, CPP, "C++, SUB");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main()
|
||||
|
@@ -7,7 +7,7 @@ namespace ad = isaac;
|
||||
|
||||
template<typename T>
|
||||
void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matrix_base<T> const & cA, simple_vector_base<T> & cx,
|
||||
ad::array & y, ad::array const & A, ad::array & x, interface_t interface)
|
||||
ad::array & y, ad::array const & A, ad::array & x, interface_t interface, const char * prefix)
|
||||
{
|
||||
int failure_count = 0;
|
||||
|
||||
@@ -21,9 +21,8 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
|
||||
ad::driver::CommandQueue queue = ad::driver::queues[y.context()][0];
|
||||
|
||||
T yi = 0, xi = 0;
|
||||
const char * PREFIX = interface==clBLAS?"[BLAS]":"[C++]";
|
||||
#define TEST_OPERATION(NAME, SIZE1, SIZE2, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\
|
||||
std::cout << PREFIX << " " << NAME "..." << std::flush;\
|
||||
std::cout << "[" << prefix << "] \t" << NAME "..." << std::flush;\
|
||||
for(int i = 0 ; i < SIZE1 ; ++i)\
|
||||
{\
|
||||
yi = 0;\
|
||||
@@ -42,19 +41,29 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
|
||||
else\
|
||||
std::cout << std::endl;
|
||||
|
||||
ad::int_t offA = A.start()[0] + A.start()[1]*A.ld();
|
||||
|
||||
if(interface==clBLAS)
|
||||
{
|
||||
cl_command_queue clqueue = (*queue.handle().cl)();
|
||||
|
||||
|
||||
TEST_OPERATION("GEMV(ROW, NoTrans)", M, N, yi+=cA(i,j)*cx[j], cy[i] = yi,
|
||||
BLAS<T>::F(clblasSgemv, clblasDgemv)(clblasRowMajor, clblasTrans, N, M, 1, CHANDLE(A), OFF(A), LD(A),
|
||||
CHANDLE(x), x.start()[0], x.stride()[0], 0, CHANDLE(y), y.start()[0], y.stride()[0],
|
||||
1, &clqueue, 0, NULL, NULL), y, bufy, cy);
|
||||
|
||||
TEST_OPERATION("GEMV(ROW, Trans)", N, M, xi+=cA(j,i)*cy[j], cx[i] = xi,
|
||||
BLAS<T>::F(clblasSgemv, clblasDgemv)(clblasRowMajor, clblasNoTrans, M, N, 1, CHANDLE(A), OFF(A), LD(A),
|
||||
CHANDLE(y), y.start()[0], y.stride()[0], 0, CHANDLE(x), x.start()[0], x.stride()[0],
|
||||
1, &clqueue, 0, NULL, NULL), x, bufx, cx);
|
||||
|
||||
TEST_OPERATION("GEMV(COL, NoTrans)", M, N, yi+=cA(i,j)*cx[j], cy[i] = yi,
|
||||
BLAS<T>::F(clblasSgemv, clblasDgemv)(clblasColumnMajor, clblasNoTrans, M, N, 1, CHANDLE(A), offA, A.ld()*A.stride()[1],
|
||||
BLAS<T>::F(clblasSgemv, clblasDgemv)(clblasColumnMajor, clblasNoTrans, M, N, 1, CHANDLE(A), OFF(A), LD(A),
|
||||
CHANDLE(x), x.start()[0], x.stride()[0], 0, CHANDLE(y), y.start()[0], y.stride()[0],
|
||||
1, &clqueue, 0, NULL, NULL), y, bufy, cy);
|
||||
|
||||
TEST_OPERATION("GEMV(COL, Trans)", N, M, xi+=cA(j,i)*cy[j], cx[i] = xi,
|
||||
BLAS<T>::F(clblasSgemv, clblasDgemv)(clblasColumnMajor, clblasTrans, N, M, 1, CHANDLE(A), offA, A.ld()*A.stride()[1],
|
||||
BLAS<T>::F(clblasSgemv, clblasDgemv)(clblasColumnMajor, clblasTrans, N, M, 1, CHANDLE(A), OFF(A), LD(A),
|
||||
CHANDLE(y), y.start()[0], y.stride()[0], 0, CHANDLE(x), x.start()[0], x.stride()[0],
|
||||
1, &clqueue, 0, NULL, NULL), x, bufx, cx);
|
||||
}
|
||||
@@ -78,16 +87,17 @@ void test_impl(T epsilon, ad::driver::Context const & ctx)
|
||||
|
||||
INIT_VECTOR(M, SUBM, 7, 2, cy, y, ctx);
|
||||
INIT_VECTOR(N, SUBN, 5, 3, cx, x, ctx);
|
||||
INIT_MATRIX(M, SUBM, 9, 1, N, SUBN, 8, 4, cA, A, ctx);
|
||||
INIT_MATRIX(M, SUBM, 9, 5, N, SUBN, 8, 4, cAPP, APP, ctx);
|
||||
|
||||
|
||||
std::cout << "full..." << std::endl;
|
||||
test_row_wise_reduction(epsilon, cy_full, cA_full, cx_full, y_full, A_full, x_full, clBLAS);
|
||||
test_row_wise_reduction(epsilon, cy_full, cAPP_full, cx_full, y_full, APP_full, x_full, CPP);
|
||||
std::cout << "slice..." << std::endl;
|
||||
test_row_wise_reduction(epsilon, cy_slice, cA_slice, cx_slice, y_slice, A_slice, x_slice, clBLAS);
|
||||
test_row_wise_reduction(epsilon, cy_slice, cAPP_slice, cx_slice, y_slice, APP_slice, x_slice, CPP);
|
||||
{
|
||||
INIT_MATRIX(M, SUBM, 9, 1, N, SUBN, 8, 1, cA, A, ctx);
|
||||
test_row_wise_reduction(epsilon, cy_full, cA_full, cx_full, y_full, A_full, x_full, clBLAS, "BLAS, FULL");
|
||||
test_row_wise_reduction(epsilon, cy_slice, cA_slice, cx_slice, y_slice, A_slice, x_slice, clBLAS, "BLAS, SUB");
|
||||
}
|
||||
{
|
||||
INIT_MATRIX(M, SUBM, 9, 5, N, SUBN, 8, 4, cA, A, ctx);
|
||||
test_row_wise_reduction(epsilon, cy_full, cA_full, cx_full, y_full, A_full, x_full, CPP, "C++, FULL");
|
||||
test_row_wise_reduction(epsilon, cy_slice, cA_slice, cx_slice, y_slice, A_slice, x_slice, CPP, "C++, SUB");
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
|
Reference in New Issue
Block a user