2015-01-12 13:20:53 -05:00
# include <cmath>
# include "common.hpp"
2015-04-29 15:50:57 -04:00
# include "isaac/array.h"
# include "isaac/model/model.h"
2015-06-27 11:44:50 -04:00
# include "isaac/wrap/clBLAS.h"
2015-04-29 15:50:57 -04:00
namespace ad = isaac ;
2015-01-12 13:20:53 -05:00
template < typename T >
void test_impl ( T epsilon , simple_matrix_base < T > & cC , simple_matrix_base < T > const & cA , simple_matrix_base < T > const & cB ,
2015-06-27 11:44:50 -04:00
ad : : array & C , ad : : array const & A , ad : : array const & AT , ad : : array const & B , ad : : array const & BT ,
2015-06-27 15:22:26 -04:00
interface_t interface , const char * prefix )
2015-01-12 13:20:53 -05:00
{
int failure_count = 0 ;
2015-04-29 15:50:57 -04:00
ad : : int_t M = C . shape ( ) [ 0 ] ;
ad : : int_t N = C . shape ( ) [ 1 ] ;
ad : : int_t K = A . shape ( ) [ 1 ] ;
2015-01-12 13:20:53 -05:00
2015-06-27 11:44:50 -04:00
T alpha = 1 ;
T beta = 0 ;
ad : : driver : : CommandQueue queue = ad : : driver : : queues [ C . context ( ) ] [ 0 ] ;
2015-01-12 13:20:53 -05:00
for ( int i = 0 ; i < M ; + + i )
{
for ( int j = 0 ; j < N ; + + j )
{
T cij = 0 ;
for ( int k = 0 ; k < K ; + + k )
cij + = cA ( i , k ) * cB ( k , j ) ;
2015-06-30 17:55:57 -04:00
cC ( i , j ) = alpha * cij + beta * cC ( i , j ) ;
2015-01-12 13:20:53 -05:00
}
}
std : : vector < T > cCbuffer ( M * N ) ;
for ( int i = 0 ; i < M ; + + i )
for ( int j = 0 ; j < N ; + + j )
cCbuffer [ i + j * M ] = cC ( i , j ) ;
std : : vector < T > buffer ( M * N ) ;
2015-06-27 11:44:50 -04:00
2015-01-12 13:20:53 -05:00
# define RUN_TEST(NAME, GPU_OP)\
2015-06-27 15:22:26 -04:00
std : : cout < < " [ " < < prefix < < " ] \t " < < NAME < < " ... " < < std : : flush ; \
2015-01-12 13:20:53 -05:00
GPU_OP ; \
2015-06-30 17:55:57 -04:00
queue . synchronize ( ) ; \
2015-01-12 13:20:53 -05:00
ad : : copy ( C , buffer ) ; \
2015-01-29 15:19:40 -05:00
if ( diff ( buffer , cCbuffer , epsilon ) ) \
2015-01-12 13:20:53 -05:00
{ \
failure_count + + ; \
std : : cout < < " [Failure!] " < < std : : endl ; \
} \
else \
std : : cout < < std : : endl ;
2015-06-27 11:44:50 -04:00
if ( interface = = clBLAS )
{
cl_command_queue clqueue = ( * queue . handle ( ) . cl ) ( ) ;
2015-06-27 15:22:26 -04:00
//Row-major
2015-07-02 14:02:31 -04:00
// RUN_TEST("GEMM(ROW, N, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasNoTrans, clblasNoTrans, N, M, K, alpha, CHANDLE(B), OFF(B), LD(B),
// CHANDLE(A), OFF(A), LD(A), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
// RUN_TEST("GEMM(ROW, N, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasTrans, clblasNoTrans, N, M, K, alpha, CHANDLE(BT), OFF(BT), LD(BT),
// CHANDLE(A), OFF(A), LD(A), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
// RUN_TEST("GEMM(ROW, T, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasNoTrans, clblasTrans, N, M, K, alpha, CHANDLE(B), OFF(B), LD(B),
// CHANDLE(AT), OFF(AT), LD(AT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
// RUN_TEST("GEMM(ROW, T, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasTrans, clblasTrans, N, M, K, alpha, CHANDLE(BT), OFF(BT), LD(BT),
// CHANDLE(AT), OFF(AT), LD(AT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
2015-06-27 15:22:26 -04:00
//Column-major
2015-07-07 23:37:53 -07:00
RUN_TEST ( " GEMM(COL, N, N) " , BLAS < T > : : F ( clblasSgemm , clblasDgemm ) ( clblasColumnMajor , clblasNoTrans , clblasNoTrans , M , N , K , alpha , CHANDLE ( A ) , OFF ( A ) , LD ( A ) ,
CHANDLE ( B ) , OFF ( B ) , LD ( B ) , beta , CHANDLE ( C ) , OFF ( C ) , LD ( C ) , 1 , & clqueue , 0 , NULL , NULL ) ) ;
2015-06-30 17:55:57 -04:00
2015-06-27 15:22:26 -04:00
RUN_TEST ( " GEMM(COL, N, T) " , BLAS < T > : : F ( clblasSgemm , clblasDgemm ) ( clblasColumnMajor , clblasNoTrans , clblasTrans , M , N , K , alpha , CHANDLE ( A ) , OFF ( A ) , LD ( A ) ,
2015-06-27 13:53:31 -04:00
CHANDLE ( BT ) , OFF ( BT ) , LD ( BT ) , beta , CHANDLE ( C ) , OFF ( C ) , LD ( C ) , 1 , & clqueue , 0 , NULL , NULL ) ) ;
2015-06-30 17:55:57 -04:00
2015-07-07 23:37:53 -07:00
RUN_TEST ( " GEMM(COL, T, N) " , BLAS < T > : : F ( clblasSgemm , clblasDgemm ) ( clblasColumnMajor , clblasTrans , clblasNoTrans , M , N , K , alpha , CHANDLE ( AT ) , OFF ( AT ) , LD ( AT ) ,
CHANDLE ( B ) , OFF ( B ) , LD ( B ) , beta , CHANDLE ( C ) , OFF ( C ) , LD ( C ) , 1 , & clqueue , 0 , NULL , NULL ) ) ;
RUN_TEST ( " GEMM(COL, T, T) " , BLAS < T > : : F ( clblasSgemm , clblasDgemm ) ( clblasColumnMajor , clblasTrans , clblasTrans , M , N , K , alpha , CHANDLE ( AT ) , OFF ( AT ) , LD ( AT ) ,
CHANDLE ( BT ) , OFF ( BT ) , LD ( BT ) , beta , CHANDLE ( C ) , OFF ( C ) , LD ( C ) , 1 , & clqueue , 0 , NULL , NULL ) ) ;
2015-06-27 11:44:50 -04:00
}
else
{
RUN_TEST ( " C = A * B " , C = dot ( A , B ) )
RUN_TEST ( " C = A' * B " , C = dot ( trans ( AT ) , B ) )
RUN_TEST ( " C = A * B' " , C = dot ( A , trans ( BT ) ) )
RUN_TEST ( " C = A' * B' " , C = dot ( trans ( AT ) , trans ( BT ) ) )
}
2015-01-12 13:20:53 -05:00
if ( failure_count > 0 )
exit ( EXIT_FAILURE ) ;
}
template < typename T >
2015-04-29 15:50:57 -04:00
void test_impl ( T epsilon , ad : : driver : : Context const & ctx )
2015-01-12 13:20:53 -05:00
{
2015-07-09 10:52:54 -04:00
int_t M = 412 ;
int_t N = 248 ;
int_t K = 376 ;
2015-01-12 13:20:53 -05:00
2015-07-09 10:52:54 -04:00
int_t SUBM = 64 ;
int_t SUBN = 64 ;
int_t SUBK = 64 ;
2015-01-12 13:20:53 -05:00
2015-06-29 21:52:50 -07:00
{
2015-07-09 10:52:54 -04:00
INIT_MATRIX ( M , SUBM , 5 , 1 , N , SUBN , 7 , 1 , cC , C , ctx ) ;
INIT_MATRIX ( M , SUBM , 8 , 1 , K , SUBK , 4 , 1 , cA , A , ctx ) ;
INIT_MATRIX ( K , SUBK , 9 , 1 , N , SUBN , 6 , 1 , cB , B , ctx ) ;
// test_impl(epsilon, cC_full, cA_full, cB_full, C_full, A_full, AT_full, B_full, BT_full, clBLAS, "BLAS, FULL");
test_impl ( epsilon , cC_slice , cA_slice , cB_slice , C_slice , A_slice , AT_slice , B_slice , BT_slice , clBLAS , " BLAS, SUB " ) ;
2015-06-29 21:52:50 -07:00
}
2015-06-27 15:22:26 -04:00
2015-06-29 21:52:50 -07:00
{
2015-07-09 10:52:54 -04:00
INIT_MATRIX ( M , SUBM , 5 , 2 , N , SUBN , 7 , 3 , cC , C , ctx ) ;
INIT_MATRIX ( M , SUBM , 8 , 2 , K , SUBK , 4 , 3 , cA , A , ctx ) ;
INIT_MATRIX ( K , SUBK , 9 , 4 , N , SUBN , 6 , 2 , cB , B , ctx ) ;
// test_impl(epsilon, cC_full, cA_full, cB_full, C_full, A_full, AT_full, B_full, BT_full, CPP, "C++, FULL");
// test_impl(epsilon, cC_slice, cA_slice, cB_slice, C_slice, A_slice, AT_slice, B_slice, BT_slice, CPP, "C++, SUB");
2015-06-29 21:52:50 -07:00
}
2015-07-09 10:52:54 -04:00
2015-01-12 13:20:53 -05:00
}
int main ( )
{
2015-06-30 17:55:57 -04:00
clblasSetup ( ) ;
2015-04-29 15:50:57 -04:00
auto data = ad : : driver : : queues . contexts ( ) ;
for ( const auto & elem : data )
2015-01-19 14:40:13 -05:00
{
2015-04-29 15:50:57 -04:00
ad : : driver : : Device device = elem . second [ 0 ] . device ( ) ;
std : : cout < < " Device: " < < device . name ( ) < < " on " < < device . platform ( ) . name ( ) < < " " < < device . platform ( ) . version ( ) < < std : : endl ;
2015-01-19 14:40:13 -05:00
std : : cout < < " --- " < < std : : endl ;
std : : cout < < " >> float " < < std : : endl ;
2015-02-04 22:06:15 -05:00
test_impl < float > ( 1e-4 , elem . first ) ;
2015-01-19 14:40:13 -05:00
std : : cout < < " >> double " < < std : : endl ;
2015-02-04 22:06:15 -05:00
test_impl < double > ( 1e-9 , elem . first ) ;
2015-01-19 14:40:13 -05:00
std : : cout < < " --- " < < std : : endl ;
}
2015-06-30 17:55:57 -04:00
clblasTeardown ( ) ;
2015-01-12 13:20:53 -05:00
return EXIT_SUCCESS ;
}