#include #include #include "common.hpp" #include "isaac/array.h" #include "clBLAS.h" #include "cublas.h" namespace sc = isaac; typedef sc::int_t int_t; template void test_impl(T epsilon, simple_vector_base & cx, simple_vector_base & cy, sc::array_base & x, sc::array_base & y, interface_t interf) { using namespace std; sc::driver::Context const & ctx = x.context(); int_t N = cx.size(); sc::driver::CommandQueue queue = sc::driver::backend::queues::get(ctx,0); sc::array scratch(N, x.dtype()); unsigned int failure_count = 0; isaac::numeric_type dtype = sc::to_numeric_type::value; T cs = 0; T tmp = 0; isaac::scalar ds(dtype, ctx); #define RUN_TEST(NAME, CPU_REDUCTION, INIT, ASSIGNMENT, GPU_REDUCTION) \ cout << NAME "..." << flush;\ cs = INIT;\ for(int_t i = 0 ; i < N ; ++i)\ CPU_REDUCTION;\ cs= ASSIGNMENT ;\ GPU_REDUCTION;\ queue.synchronize();\ tmp = ds;\ if(std::isnan((T)tmp) || (std::abs(cs - tmp)/std::max(cs, tmp)) > epsilon)\ {\ failure_count++;\ cout << " [Failure!]" << endl;\ }\ else\ cout << endl; if(ctx.backend()==sc::driver::OPENCL && interf==clBLAS) { cl_command_queue clqueue = queue.handle().cl(); RUN_TEST("DOT", cs+=cx[i]*cy[i], 0, cs, BLAS::F(clblasSdot, clblasDdot)(N, CHANDLE(ds), 0, CHANDLE(x), OFF(x), INC(x), CHANDLE(y), OFF(y), INC(y), CHANDLE(scratch), 1, &clqueue, 0, NULL, NULL)); RUN_TEST("ASUM", cs+=std::fabs(cx[i]), 0, cs, BLAS::F(clblasSasum, clblasDasum)(N, CHANDLE(ds), 0, CHANDLE(x), OFF(x), INC(x), CHANDLE(scratch), 1, &clqueue, 0, NULL, NULL)); } if(ctx.backend()==sc::driver::CUDA && interf==cuBLAS) { RUN_TEST("DOT", cs+=cx[i]*cy[i], 0, cs, ds = BLAS::F(cublasSdot, cublasDdot)(N, (T*)CUHANDLE(x) + OFF(x), INC(x), (T*)CUHANDLE(y) + OFF(y), INC(y))); RUN_TEST("ASUM", cs+=std::fabs(cx[i]), 0, cs, ds = BLAS::F(cublasSasum, cublasDasum)(N, (T*)CUHANDLE(x) + OFF(x), INC(x))); } if(interf==CPP) { RUN_TEST("s = x'.y", cs+=cx[i]*cy[i], 0, cs, ds = dot(x,y)); RUN_TEST("s = exp(x'.y)", cs += cx[i]*cy[i], 0, std::exp(cs), ds = exp(dot(x,y))); RUN_TEST("s = 1 + x'.y", cs += cx[i]*cy[i], 0, 1 + cs, ds = 1 + dot(x,y)); RUN_TEST("s = x'.y + y'.y", cs+= cx[i]*cy[i] + cy[i]*cy[i], 0, cs, ds = dot(x,y) + dot(y,y)); RUN_TEST("s = max(x)", cs = std::max(cs, cx[i]), std::numeric_limits::min(), cs, ds = max(x)); RUN_TEST("s = min(x)", cs = std::min(cs, cx[i]), std::numeric_limits::max(), cs, ds = min(x)); } #undef RUN_TEST if(failure_count > 0) exit(EXIT_FAILURE); } template void test(T epsilon, sc::driver::Context const & ctx) { int_t N = 10007; int_t SUBN = 7; INIT_VECTOR(N, SUBN, 0, 1, cx, x, ctx); INIT_VECTOR(N, SUBN, 0, 1, cy, y, ctx); std::cout << "> standard..." << std::endl; test_impl(epsilon, cx, cy, x, y, clBLAS); test_impl(epsilon, cx, cy, x, y, cuBLAS); test_impl(epsilon, cx, cy, x, y, CPP); std::cout << "> slice..." << std::endl; test_impl(epsilon, cx_s, cy_s, x_s, y_s, clBLAS); test_impl(epsilon, cx_s, cy_s, x_s, y_s, cuBLAS); test_impl(epsilon, cx_s, cy_s, x_s, y_s, CPP); } int main() { clblasSetup(); std::list data; sc::driver::backend::contexts::get(data); for(isaac::driver::Context const * context : data) { sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device(); std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; test(eps_float, *context); if(device.fp64_support()) { std::cout << ">> double" << std::endl; test(eps_double, *context); } std::cout << "---" << std::endl; } clblasTeardown(); return EXIT_SUCCESS; }