diff --git a/include/isaac/model/model.h b/include/isaac/model/model.h index 449299042..54c4700f8 100644 --- a/include/isaac/model/model.h +++ b/include/isaac/model/model.h @@ -23,18 +23,22 @@ namespace isaac driver::Program& init(controller const &); public: - model(expression_type, numeric_type, predictors::random_forest const &, std::vector< tools::shared_ptr > const &, driver::CommandQueue &); - model(expression_type, numeric_type, base const &, driver::CommandQueue &); + model(expression_type, numeric_type, predictors::random_forest const &, std::vector< tools::shared_ptr > const &, driver::CommandQueue const &); + model(expression_type, numeric_type, base const &, driver::CommandQueue const &); void execute(controller const &); templates_container const & templates() const; + + void test() const + { std::cout << queue_.device().backend() << std::endl;} + private: templates_container templates_; template_pointer fallback_; tools::shared_ptr predictor_; std::map, int> hardcoded_; std::map > > programs_; - driver::CommandQueue & queue_; + driver::CommandQueue queue_; }; typedef std::map, tools::shared_ptr > model_map_t; diff --git a/include/isaac/symbolic/expression.h b/include/isaac/symbolic/expression.h index 79b8dc453..26a2d135b 100644 --- a/include/isaac/symbolic/expression.h +++ b/include/isaac/symbolic/expression.h @@ -238,7 +238,7 @@ struct execution_options_type { if(queue_) return *queue_; - return (driver::CommandQueue &)driver::queues[context][queue_id_]; + return driver::queues[context][queue_id_]; } std::list* events; diff --git a/lib/driver/device.cpp b/lib/driver/device.cpp index d6a77800b..6f96d2d71 100644 --- a/lib/driver/device.cpp +++ b/lib/driver/device.cpp @@ -134,9 +134,11 @@ std::string Device::extensions() const switch(backend_) { #ifdef ISAAC_WITH_CUDA - case CUDA: return ""; + case CUDA: + return ""; #endif - case OPENCL: return h_.cl->getInfo(); + case OPENCL: + return h_.cl->getInfo(); default: throw; } } diff --git a/lib/model/model.cpp b/lib/model/model.cpp index f29f180dd..30a83b0da 100644 --- a/lib/model/model.cpp +++ b/lib/model/model.cpp @@ -86,12 +86,12 @@ driver::Program& model::init(controller const & expressions) return *program; } -model::model(expression_type etype, numeric_type dtype, predictors::random_forest const & predictor, std::vector< tools::shared_ptr > const & templates, driver::CommandQueue & queue) : +model::model(expression_type etype, numeric_type dtype, predictors::random_forest const & predictor, std::vector< tools::shared_ptr > const & templates, driver::CommandQueue const & queue) : templates_(templates), fallback_(fallbacks[std::make_pair(etype, dtype)]), predictor_(new predictors::random_forest(predictor)), queue_(queue) {} -model::model(expression_type etype, numeric_type dtype, base const & tp, driver::CommandQueue & queue) : templates_(1,tp.clone()), fallback_(fallbacks[std::make_pair(etype, dtype)]), queue_(queue) +model::model(expression_type etype, numeric_type dtype, base const & tp, driver::CommandQueue const & queue) : templates_(1,tp.clone()), fallback_(fallbacks[std::make_pair(etype, dtype)]), queue_(queue) {} void model::execute(controller const & expr) diff --git a/lib/wrap/clBLAS.cpp b/lib/wrap/clBLAS.cpp index d719843bd..8c86c06c9 100644 --- a/lib/wrap/clBLAS.cpp +++ b/lib/wrap/clBLAS.cpp @@ -45,107 +45,136 @@ extern "C" //***************** //BLAS1 //***************** - clblasStatus clblasSaxpy(size_t N, cl_float alpha, - const cl_mem mx, size_t offx, int incx, - cl_mem my, size_t offy, int incy, - cl_uint numCommandQueues, cl_command_queue *commandQueues, - cl_uint numEventsInWaitList, const cl_event *eventWaitList, - cl_event *events) - { - is::array x(N, is::FLOAT_TYPE, cl::Buffer(mx), offx, incx); - clRetainMemObject(mx); - is::array y(N, is::FLOAT_TYPE, cl::Buffer(my), offy, incy); - clRetainMemObject(my); - execute(is::detail::assign(y, x + alpha*y), y.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); - return clblasSuccess; + + //AXPY + #define MAKE_AXPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CL) \ + clblasStatus clblas ## TYPE_CHAR ## axpy(size_t N, TYPE_CL alpha, \ + const cl_mem mx, size_t offx, int incx, \ + cl_mem my, size_t offy, int incy, \ + cl_uint numCommandQueues, cl_command_queue *commandQueues, \ + cl_uint numEventsInWaitList, const cl_event *eventWaitList, \ + cl_event *events) \ + { \ + is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx); \ + clRetainMemObject(mx); \ + is::array y(N, TYPE_ISAAC, cl::Buffer(my), offy, incy); \ + clRetainMemObject(my); \ + execute(is::detail::assign(y, x + alpha*y), y.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); \ + return clblasSuccess; \ } - clblasStatus clblasSscal(size_t N, cl_float alpha, - cl_mem mx, size_t offx, int incx, - cl_uint numCommandQueues, cl_command_queue *commandQueues, - cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) - { - is::array x(N, is::FLOAT_TYPE, cl::Buffer(mx), offx, incx); - clRetainMemObject(mx); - execute(is::detail::assign(x, alpha*x), x.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); - return clblasSuccess; + MAKE_AXPY(S, is::FLOAT_TYPE, cl_float) + MAKE_AXPY(D, is::DOUBLE_TYPE, cl_double) + + //SCAL + #define MAKE_SCAL(TYPE_CHAR, TYPE_ISAAC, TYPE_CL) \ + clblasStatus clblas ## TYPE_CHAR ## scal(size_t N, TYPE_CL alpha,\ + cl_mem mx, size_t offx, int incx,\ + cl_uint numCommandQueues, cl_command_queue *commandQueues,\ + cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\ + {\ + is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx);\ + clRetainMemObject(mx);\ + execute(is::detail::assign(x, alpha*x), x.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\ + return clblasSuccess;\ } - clblasStatus clblasScopy(size_t N, - const cl_mem mx, size_t offx, int incx, - cl_mem my, size_t offy, int incy, - cl_uint numCommandQueues, cl_command_queue *commandQueues, - cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) - { - const is::array x(N, is::FLOAT_TYPE, cl::Buffer(mx), offx, incx); - clRetainMemObject(mx); - is::array y(N, is::FLOAT_TYPE, cl::Buffer(my), offy, incy); - clRetainMemObject(my); - execute(is::detail::assign(y, x), y.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); - return clblasSuccess; + MAKE_SCAL(S, is::FLOAT_TYPE, cl_float) + MAKE_SCAL(D, is::DOUBLE_TYPE, cl_double) + + //COPY + #define MAKE_COPY(TYPE_CHAR, TYPE_ISAAC, TYPE_CL)\ + clblasStatus clblas ## TYPE_CHAR ## copy(size_t N,\ + const cl_mem mx, size_t offx, int incx,\ + cl_mem my, size_t offy, int incy,\ + cl_uint numCommandQueues, cl_command_queue *commandQueues,\ + cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\ + {\ + const is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx);\ + clRetainMemObject(mx);\ + is::array y(N, TYPE_ISAAC, cl::Buffer(my), offy, incy);\ + clRetainMemObject(my);\ + execute(is::detail::assign(y, x), y.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\ + return clblasSuccess;\ } - clblasStatus clblasSdot(size_t N, cl_mem dotProduct, size_t offDP, - const cl_mem mx, size_t offx, int incx, - const cl_mem my, size_t offy, int incy, - cl_mem /*scratchBuff*/, cl_uint numCommandQueues, - cl_command_queue *commandQueues, cl_uint numEventsInWaitList, - const cl_event *eventWaitList, cl_event *events) - { - is::array x(N, is::FLOAT_TYPE, cl::Buffer(mx), offx, incx); - clRetainMemObject(mx); - is::array y(N, is::FLOAT_TYPE, cl::Buffer(my), offy, incy); - clRetainMemObject(my); - is::scalar s(is::FLOAT_TYPE, cl::Buffer(dotProduct), offDP); - clRetainMemObject(dotProduct); - execute(is::detail::assign(s, dot(x,y)), s.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); - return clblasSuccess; + MAKE_COPY(S, is::FLOAT_TYPE, cl_float) + MAKE_COPY(D, is::DOUBLE_TYPE, cl_double) + + //DOT + #define MAKE_DOT(TYPE_CHAR, TYPE_ISAAC, TYPE_CL) \ + clblasStatus clblas ## TYPE_CHAR ## dot(size_t N, cl_mem dotProduct, size_t offDP, \ + const cl_mem mx, size_t offx, int incx, \ + const cl_mem my, size_t offy, int incy, \ + cl_mem /*scratchBuff*/, cl_uint numCommandQueues, \ + cl_command_queue *commandQueues, cl_uint numEventsInWaitList, \ + const cl_event *eventWaitList, cl_event *events) \ + { \ + is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx); \ + clRetainMemObject(mx); \ + is::array y(N, TYPE_ISAAC, cl::Buffer(my), offy, incy); \ + clRetainMemObject(my); \ + is::scalar s(TYPE_ISAAC, cl::Buffer(dotProduct), offDP); \ + clRetainMemObject(dotProduct); \ + execute(is::detail::assign(s, dot(x,y)), s.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); \ + return clblasSuccess; \ } - clblasStatus clblasSasum(size_t N, cl_mem asum, size_t offAsum, - const cl_mem mx, size_t offx, int incx, - cl_mem /*scratchBuff*/, cl_uint numCommandQueues, cl_command_queue *commandQueues, - cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) - { - is::array x(N, is::FLOAT_TYPE, cl::Buffer(mx), offx, incx); - clRetainMemObject(mx); - is::scalar s(is::FLOAT_TYPE, cl::Buffer(asum), offAsum); - clRetainMemObject(asum); - execute(is::detail::assign(s, sum(abs(x))), s.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); - return clblasSuccess; + MAKE_DOT(S, is::FLOAT_TYPE, cl_float) + MAKE_DOT(D, is::DOUBLE_TYPE, cl_double) + + //ASUM + #define MAKE_ASUM(TYPE_CHAR, TYPE_ISAAC, TYPE_CL) \ + clblasStatus clblas ## TYPE_CHAR ## asum(size_t N, cl_mem asum, size_t offAsum, \ + const cl_mem mx, size_t offx, int incx,\ + cl_mem /*scratchBuff*/, cl_uint numCommandQueues, cl_command_queue *commandQueues,\ + cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\ + {\ + is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx);\ + clRetainMemObject(mx);\ + is::scalar s(TYPE_ISAAC, cl::Buffer(asum), offAsum);\ + clRetainMemObject(asum);\ + execute(is::detail::assign(s, sum(abs(x))), s.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\ + return clblasSuccess;\ } + MAKE_ASUM(S, is::FLOAT_TYPE, cl_float) + MAKE_ASUM(D, is::DOUBLE_TYPE, cl_double) + //***************** //BLAS2 //***************** - clblasStatus clblasSgemv(clblasOrder order, clblasTranspose transA, - size_t M, size_t N, - cl_float alpha, const cl_mem mA, size_t offA, size_t lda, - const cl_mem mx, size_t offx, int incx, - cl_float beta, cl_mem my, size_t offy, int incy, - cl_uint numCommandQueues, cl_command_queue *commandQueues, - cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) - { - //A - is::int_t As1 = M, As2 = N; - if(transA==clblasTrans) std::swap(As1, As2); - is::array A(As1, As2, is::FLOAT_TYPE, cl::Buffer(mA), offA, lda); - clRetainMemObject(mA); - //x - is::array x(N, is::FLOAT_TYPE, cl::Buffer(mx), offx, incx); - clRetainMemObject(mx); - //y - is::array y(N, is::FLOAT_TYPE, cl::Buffer(my), offy, incy); - clRetainMemObject(my); - //Operation - is::driver::Context const & context = A.context(); - if(transA==clblasTrans) - execute(is::detail::assign(y, alpha*dot(A.T(), x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); - else - execute(is::detail::assign(y, alpha*dot(A, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); - return clblasSuccess; + #define MAKE_GEMV(TYPE_CHAR, TYPE_ISAAC, TYPE_CL) \ + clblasStatus clblas ## TYPE_CHAR ## gemv(clblasOrder order, clblasTranspose transA,\ + size_t M, size_t N,\ + TYPE_CL alpha, const cl_mem mA, size_t offA, size_t lda,\ + const cl_mem mx, size_t offx, int incx,\ + TYPE_CL beta, cl_mem my, size_t offy, int incy,\ + cl_uint numCommandQueues, cl_command_queue *commandQueues,\ + cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\ + {\ + is::int_t As1 = M, As2 = N;\ + if(transA==clblasTrans) std::swap(As1, As2);\ + is::array A(As1, As2, TYPE_ISAAC, cl::Buffer(mA), offA, lda);\ + clRetainMemObject(mA);\ + \ + is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx);\ + clRetainMemObject(mx);\ + \ + is::array y(N, TYPE_ISAAC, cl::Buffer(my), offy, incy);\ + clRetainMemObject(my);\ + \ + is::driver::Context const & context = A.context();\ + if((transA==clblasTrans) ^ (order==clblasRowMajor))\ + execute(is::detail::assign(y, alpha*dot(A.T(), x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\ + else\ + execute(is::detail::assign(y, alpha*dot(A, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\ + return clblasSuccess;\ } + MAKE_GEMV(S, is::FLOAT_TYPE, cl_float) + MAKE_GEMV(D, is::DOUBLE_TYPE, cl_double) + //***************** //BLAS3 //***************** diff --git a/tests/linalg/common.hpp b/tests/linalg/common.hpp index 794403992..3f00ff301 100644 --- a/tests/linalg/common.hpp +++ b/tests/linalg/common.hpp @@ -6,6 +6,10 @@ typedef isaac::int_t int_t; +template struct BLAS; +template<> struct BLAS { template static FT F(FT SAXPY, DT ) { return SAXPY; } }; +template<> struct BLAS { template static DT F(FT , DT DAXPY) { return DAXPY; } }; + /*------ Simple Vector ---------*/ template class simple_vector_base diff --git a/tests/linalg/reduction.cpp b/tests/linalg/reduction.cpp index 703d52592..30e202034 100644 --- a/tests/linalg/reduction.cpp +++ b/tests/linalg/reduction.cpp @@ -3,6 +3,7 @@ #include "common.hpp" #include "isaac/array.h" +#include "isaac/wrap/clBLAS.h" namespace ad = isaac; typedef ad::int_t int_t; @@ -14,6 +15,9 @@ void test_reduction(T epsilon, simple_vector_base & cx, simple_vector_base::value; @@ -23,7 +27,7 @@ void test_reduction(T epsilon, simple_vector_base & cx, simple_vector_base & cx, simple_vector_base::F(clblasSdot, clblasDdot)(N, (*ds.data().handle().cl)(), 0, (*x.data().handle().cl)(), x.start()[0], x.stride()[0], + (*y.data().handle().cl)(), y.start()[0], y.stride()[0], + 0, 1, &clqueue, 0, NULL, NULL)); + + RUN_TEST("ASUM", cs+=std::fabs(cx[i]), 0, cs, BLAS::F(clblasSasum, clblasDasum)(N, (*ds.data().handle().cl)(), 0, (*x.data().handle().cl)(), x.start()[0], x.stride()[0], + 0, 1, &clqueue, 0, NULL, NULL)); +#undef PREFIX +#define PREFIX "[C++]" + RUN_TEST("s = x'.y", cs+=cx[i]*cy[i], 0, cs, ds = dot(x,y)); RUN_TEST("s = exp(x'.y)", cs += cx[i]*cy[i], 0, std::exp(cs), ds = exp(dot(x,y))); RUN_TEST("s = 1 + x'.y", cs += cx[i]*cy[i], 0, 1 + cs, ds = 1 + dot(x,y)); diff --git a/tests/linalg/vaxpy.cpp b/tests/linalg/vaxpy.cpp index c39ed4909..91d2955ff 100644 --- a/tests/linalg/vaxpy.cpp +++ b/tests/linalg/vaxpy.cpp @@ -2,6 +2,7 @@ #include #include "common.hpp" #include "isaac/array.h" +#include "isaac/wrap/clBLAS.h" namespace ad = isaac; typedef isaac::int_t int_t; @@ -15,7 +16,8 @@ void test_element_wise_vector(T epsilon, simple_vector_base & cx, simple_vect int failure_count = 0; ad::numeric_type dtype = x.dtype(); ad::driver::Context const & ctx = x.context(); - + ad::driver::CommandQueue queue = ad::driver::queues[ctx][0]; + cl_command_queue clqueue = (*queue.handle().cl)(); int_t N = cz.size(); T aa = 3.12, bb=3.5; @@ -26,7 +28,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base & cx, simple_vect #define CONVERT #define RUN_TEST_VECTOR_AXPY(NAME, CPU_LOOP, GPU_EXPR) \ {\ - std::cout << NAME "..." << std::flush;\ + std::cout << PREFIX << " " << NAME "..." << std::flush;\ for(int_t i = 0 ; i < N ; ++i)\ CPU_LOOP;\ GPU_EXPR;\ @@ -41,8 +43,20 @@ void test_element_wise_vector(T epsilon, simple_vector_base & cx, simple_vect std::cout << std::endl;\ } - RUN_TEST_VECTOR_AXPY("z = 0", cz[i] = 0, z = zeros(N, 1, dtype, ctx)) +#define PREFIX "[C]" + RUN_TEST_VECTOR_AXPY("AXPY", cy[i] = cx[i] + a*cy[i], BLAS::F(clblasSaxpy, clblasDaxpy)(N, a, (*x.data().handle().cl)(), x.start()[0], x.stride()[0], + (*y.data().handle().cl)(), y.start()[0], y.stride()[0], + 1, &clqueue, 0, NULL, NULL)); + RUN_TEST_VECTOR_AXPY("COPY", cy[i] = cx[i], BLAS::F(clblasScopy, clblasDcopy)(N, (*x.data().handle().cl)(), x.start()[0], x.stride()[0], + (*y.data().handle().cl)(), y.start()[0], y.stride()[0], + 1, &clqueue, 0, NULL, NULL)); + + RUN_TEST_VECTOR_AXPY("SCAL", cx[i] = a*cx[i], BLAS::F(clblasSscal, clblasDscal)(N, a, (*x.data().handle().cl)(), x.start()[0], x.stride()[0], + 1, &clqueue, 0, NULL, NULL)); +#undef PREFIX +#define PREFIX "[C++]" + RUN_TEST_VECTOR_AXPY("z = 0", cz[i] = 0, z = zeros(N, 1, dtype, ctx)) RUN_TEST_VECTOR_AXPY("z = x", cz[i] = cx[i], z = x) RUN_TEST_VECTOR_AXPY("z = -x", cz[i] = -cx[i], z = -x) @@ -88,8 +102,10 @@ void test_element_wise_vector(T epsilon, simple_vector_base & cx, simple_vect RUN_TEST_VECTOR_AXPY("z = x 0) exit(EXIT_FAILURE); }