#include #include "common.hpp" #include "isaac/array.h" #include "isaac/wrap/clBLAS.h" namespace ad = isaac; template void test_row_wise_reduction(T epsilon, simple_vector_base & cy, simple_matrix_base const & cA, simple_vector_base & cx, ad::array & y, ad::array const & A, ad::array & x, interface_t interface, const char * prefix) { int failure_count = 0; ad::int_t M = A.shape()[0]; ad::int_t N = A.shape()[1]; simple_vector bufy(M); simple_vector bufx(N); T alpha = 4.2, beta = 1.8; ad::driver::CommandQueue queue = ad::driver::queues[y.context()][0]; T yi = 0, xi = 0; #define TEST_OPERATION(NAME, SIZE1, SIZE2, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\ std::cout << "[" << prefix << "] \t" << NAME "..." << std::flush;\ for(int i = 0 ; i < SIZE1 ; ++i)\ {\ yi = 0;\ xi = 0;\ for(int j = 0 ; j < SIZE2 ; ++j)\ REDUCTION;\ ASSIGNMENT;\ }\ GPU_REDUCTION;\ queue.synchronize();\ ad::copy(RES, BUF.data());\ if(diff(CRES, BUF, epsilon))\ {\ failure_count++;\ std::cout << " [Failure!]" << std::endl;\ }\ else\ std::cout << std::endl; if(interface==clBLAS) { cl_command_queue clqueue = (*queue.handle().cl)(); TEST_OPERATION("GEMV(ROW, NoTrans)", M, N, yi+=cA(i,j)*cx[j], cy[i] = alpha*yi + beta*cy[i], BLAS::F(clblasSgemv, clblasDgemv)(clblasRowMajor, clblasTrans, N, M, alpha, CHANDLE(A), OFF(A), LD(A), CHANDLE(x), x.start()[0], x.stride()[0], beta, CHANDLE(y), y.start()[0], y.stride()[0], 1, &clqueue, 0, NULL, NULL), y, bufy, cy); TEST_OPERATION("GEMV(ROW, Trans)", N, M, xi+=cA(j,i)*cy[j], cx[i] = alpha*xi + beta*cx[i], BLAS::F(clblasSgemv, clblasDgemv)(clblasRowMajor, clblasNoTrans, N, M, alpha, CHANDLE(A), OFF(A), LD(A), CHANDLE(y), y.start()[0], y.stride()[0], beta, CHANDLE(x), x.start()[0], x.stride()[0], 1, &clqueue, 0, NULL, NULL), x, bufx, cx); TEST_OPERATION("GEMV(COL, NoTrans)", M, N, yi+=cA(i,j)*cx[j], cy[i] = alpha*yi + beta*cy[i], BLAS::F(clblasSgemv, clblasDgemv)(clblasColumnMajor, clblasNoTrans, M, N, alpha, CHANDLE(A), OFF(A), LD(A), CHANDLE(x), x.start()[0], x.stride()[0], 0, CHANDLE(y), y.start()[0], y.stride()[0], 1, &clqueue, 0, NULL, NULL), y, bufy, cy); TEST_OPERATION("GEMV(COL, Trans)", N, M, xi+=cA(j,i)*cy[j], cx[i] = alpha*xi + beta*cx[i], BLAS::F(clblasSgemv, clblasDgemv)(clblasColumnMajor, clblasTrans, M, N, alpha, CHANDLE(A), OFF(A), LD(A), CHANDLE(y), y.start()[0], y.stride()[0], beta, CHANDLE(x), x.start()[0], x.stride()[0], 1, &clqueue, 0, NULL, NULL), x, bufx, cx); } else { TEST_OPERATION("y = A.x", M, N, yi+=cA(i,j)*cx[j], cy[i] = yi, y = dot(A,x), y, bufy, cy); TEST_OPERATION("x = A'.y", N, M, xi+=cA(j,i)*cy[j], cx[i] = xi, x = dot(trans(A),y), x, bufx, cx); } if(failure_count>0) exit(EXIT_FAILURE); } template void test_impl(T epsilon, ad::driver::Context const & ctx) { int_t M = 1324; int_t N = 1143; int_t SUBM = 184; int_t SUBN = 145; INIT_VECTOR(M, SUBM, 7, 2, cy, y, ctx); INIT_VECTOR(N, SUBN, 5, 3, cx, x, ctx); { INIT_MATRIX(M, SUBM, 9, 1, N, SUBN, 8, 1, cA, A, ctx); test_row_wise_reduction(epsilon, cy_full, cA_full, cx_full, y_full, A_full, x_full, clBLAS, "BLAS, FULL"); test_row_wise_reduction(epsilon, cy_slice, cA_slice, cx_slice, y_slice, A_slice, x_slice, clBLAS, "BLAS, SUB"); } { INIT_MATRIX(M, SUBM, 9, 5, N, SUBN, 8, 4, cA, A, ctx); test_row_wise_reduction(epsilon, cy_full, cA_full, cx_full, y_full, A_full, x_full, CPP, "C++, FULL"); test_row_wise_reduction(epsilon, cy_slice, cA_slice, cx_slice, y_slice, A_slice, x_slice, CPP, "C++, SUB"); } } int main() { clblasSetup(); auto data = ad::driver::queues.contexts(); for(const auto & elem : data) { ad::driver::Device device = elem.second[0].device(); std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; test_impl(1e-4, elem.first); std::cout << ">> double" << std::endl; test_impl(1e-9, elem.first); std::cout << "---" << std::endl; } clblasTeardown(); return EXIT_SUCCESS; }