Code quality: bugfix in bench/test to note call clBLAS on CUDA backend

This commit is contained in:
Philippe Tillet
2015-08-26 14:12:50 -04:00
parent 9da87bee51
commit 69c11d16cc
6 changed files with 62 additions and 42 deletions

View File

@@ -145,6 +145,7 @@ void bench(sc::numeric_type dtype, std::string operation)
BENCHMARK_ISAAC(y = sc::control(x + alpha*y, sc::execution_options_type(0, &events)), 3*N*dtsize/t)
/* clblas */
#ifdef BENCH_CLBLAS
if(A.context().backend()==sc::driver::OPENCL)
BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event), 3*N*dtsize/t);
#endif
/* BLAS */
@@ -235,6 +236,7 @@ void bench(sc::numeric_type dtype, std::string operation)
#endif
BENCHMARK_ISAAC(y = sc::control(AT?dot(A.T(),x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
#ifdef BENCH_CLBLAS
if(A.context().backend()==sc::driver::OPENCL)
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t)
#endif
#ifdef BENCH_CBLAS
@@ -324,6 +326,7 @@ void bench(sc::numeric_type dtype, std::string operation)
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t);
/* clblas */
#ifdef BENCH_CLBLAS
if(A.context().backend()==sc::driver::OPENCL)
BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb,
0, CL_HANDLE(C.data()), 0, ldc, 1, &CL_HANDLE(queue),0, NULL, &event), (double)2*M*N*K/t)
#endif

View File

@@ -1,6 +1,8 @@
#include "isaac/driver/handle.h"
#include <cassert>
#include <memory>
#include "isaac/driver/handle.h"
namespace isaac
{
@@ -98,21 +100,29 @@ Handle<CLType, CUType>::~Handle()
template<class CLType, class CUType>
CLType & Handle<CLType, CUType>::cl()
{ return *cl_; }
{
assert(backend_==OPENCL);
return *cl_;
}
template<class CLType, class CUType>
CLType const & Handle<CLType, CUType>::cl() const
{ return *cl_; }
{
assert(backend_==OPENCL);
return *cl_;
}
template<class CLType, class CUType>
CUType & Handle<CLType, CUType>::cu()
{
assert(backend_==CUDA);
return *cu_;
}
template<class CLType, class CUType>
CUType const & Handle<CLType, CUType>::cu() const
{
assert(backend_==CUDA);
return *cu_;
}

View File

@@ -72,7 +72,7 @@ def main():
libraries += ['gnustl_shared']
#Source files
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
src = 'src/lib/exception/operation_not_supported.cpp src/lib/exception/unknown_datatype.cpp src/lib/value_scalar.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/backend.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/buffer.cpp src/lib/driver/context.cpp src/lib/driver/dispatch.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/stream.cpp src/lib/kernels/keywords.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/binder.cpp src/lib/kernels/parse.cpp src/lib/wrap/clBLAS.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/io.cpp src/lib/symbolic/preset.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]

View File

@@ -10,7 +10,7 @@ typedef isaac::int_t int_t;
template<typename T>
void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T>& cy, simple_vector_base<T>& cz,
sc::array& x, sc::array& y, sc::array& z)
sc::array& x, sc::array& y, sc::array& z, interface_t interf)
{
using namespace std;
@@ -18,7 +18,6 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
sc::numeric_type dtype = x.dtype();
sc::driver::Context const & context = x.context();
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(context,0);
cl_command_queue clqueue = queue.handle().cl();
int_t N = cz.size();
T aa = static_cast<T>(-4.3);
@@ -30,7 +29,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
#define CONVERT
#define RUN_TEST_VECTOR_AXPY(NAME, CPU_LOOP, GPU_EXPR) \
{\
std::cout << PREFIX << " " << NAME "..." << std::flush;\
std::cout << NAME "..." << std::flush;\
for(int_t i = 0 ; i < N ; ++i)\
CPU_LOOP;\
GPU_EXPR;\
@@ -46,8 +45,11 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
std::cout << std::endl;\
}
if(queue.device().backend()==sc::driver::OPENCL){
#define PREFIX "[C]"
if(queue.device().backend()==sc::driver::OPENCL && interf==clBLAS)
{
cl_command_queue clqueue = queue.handle().cl();
RUN_TEST_VECTOR_AXPY("AXPY", cz[i] = a*cx[i] + cz[i], BLAS<T>::F(clblasSaxpy, clblasDaxpy)(N, a, CHANDLE(x), x.start()[0], x.stride()[0],
CHANDLE(z), z.start()[0], z.stride()[0],
1, &clqueue, 0, NULL, NULL));
@@ -58,10 +60,8 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
RUN_TEST_VECTOR_AXPY("SCAL", cz[i] = a*cz[i], BLAS<T>::F(clblasSscal, clblasDscal)(N, a, CHANDLE(z), z.start()[0], z.stride()[0],
1, &clqueue, 0, NULL, NULL));
#undef PREFIX
}
#define PREFIX "[C++]"
RUN_TEST_VECTOR_AXPY("z = 0", cz[i] = 0, z = zeros(N, 1, dtype, context))
RUN_TEST_VECTOR_AXPY("z = x", cz[i] = cx[i], z = x)
RUN_TEST_VECTOR_AXPY("z = -x", cz[i] = -cx[i], z = -x)
@@ -128,14 +128,16 @@ void test_impl(T epsilon, sc::driver::Context const & ctx)
INIT_VECTOR(N, SUBN, 3, 2, cz, z, ctx);
#define TEST_OPERATIONS(TYPE)\
#define TEST_OPERATIONS(TYPE, INTERF)\
test_element_wise_vector(epsilon, cx_ ## TYPE, cy_ ## TYPE, cz_ ## TYPE,\
x_ ## TYPE, y_ ## TYPE, z_ ## TYPE);\
x_ ## TYPE, y_ ## TYPE, z_ ## TYPE, INTERF);\
std::cout << "> standard..." << std::endl;
TEST_OPERATIONS(full);
TEST_OPERATIONS(full, clBLAS);
TEST_OPERATIONS(full, CPP);
std::cout << "> slice..." << std::endl;
TEST_OPERATIONS(slice);
TEST_OPERATIONS(slice, clBLAS);
TEST_OPERATIONS(slice, CPP);
}
int main()

View File

@@ -10,13 +10,12 @@ typedef sc::int_t int_t;
template<typename T>
void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T> & cy,
sc::array & x, sc::array & y)
sc::array & x, sc::array & y, interface_t interf)
{
using namespace std;
sc::driver::Context const & ctx = x.context();
int_t N = cx.size();
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(ctx,0);
cl_command_queue clqueue = queue.handle().cl();
sc::array scratch(N, x.dtype());
unsigned int failure_count = 0;
@@ -28,7 +27,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
isaac::scalar ds(dtype, ctx);
#define RUN_TEST(NAME, CPU_REDUCTION, INIT, ASSIGNMENT, GPU_REDUCTION) \
cout << PREFIX << " " << NAME "..." << flush;\
cout << NAME "..." << flush;\
cs = INIT;\
for(int_t i = 0 ; i < N ; ++i)\
CPU_REDUCTION;\
@@ -44,14 +43,18 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
else\
cout << endl;
#define PREFIX "[C]"
if(ctx.backend()==sc::driver::OPENCL && interf==clBLAS)
{
cl_command_queue clqueue = queue.handle().cl();
RUN_TEST("DOT", cs+=cx[i]*cy[i], 0, cs, BLAS<T>::F(clblasSdot, clblasDdot)(N, CHANDLE(ds), 0, CHANDLE(x), x.start()[0], x.stride()[0],
CHANDLE(y), y.start()[0], y.stride()[0],
CHANDLE(scratch), 1, &clqueue, 0, NULL, NULL));
RUN_TEST("ASUM", cs+=std::fabs(cx[i]), 0, cs, BLAS<T>::F(clblasSasum, clblasDasum)(N, CHANDLE(ds), 0, CHANDLE(x), x.start()[0], x.stride()[0],
CHANDLE(scratch), 1, &clqueue, 0, NULL, NULL));
#undef PREFIX
#define PREFIX "[C++]"
}
RUN_TEST("s = x'.y", cs+=cx[i]*cy[i], 0, cs, ds = dot(x,y));
RUN_TEST("s = exp(x'.y)", cs += cx[i]*cy[i], 0, std::exp(cs), ds = exp(dot(x,y)));
@@ -77,14 +80,16 @@ void test_impl(T epsilon, sc::driver::Context const & ctx)
INIT_VECTOR(N, SUBN, 0, 1, cx, x, ctx);
INIT_VECTOR(N, SUBN, 0, 1, cy, y, ctx);
#define TEST_OPERATIONS(TYPE)\
#define TEST_OPERATIONS(TYPE, ITF)\
test_reduction(epsilon, cx_ ## TYPE, cy_ ## TYPE,\
x_ ## TYPE, y_ ## TYPE);\
x_ ## TYPE, y_ ## TYPE, ITF);\
std::cout << "> standard..." << std::endl;
TEST_OPERATIONS(full);
TEST_OPERATIONS(full, clBLAS);
TEST_OPERATIONS(full, CPP);
std::cout << "> slice..." << std::endl;
TEST_OPERATIONS(slice);
TEST_OPERATIONS(slice, clBLAS);
TEST_OPERATIONS(slice, CPP);
}

View File

@@ -47,7 +47,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
std::cout << std::endl;
if(interf==clBLAS)
if(y.context().backend()==sc::driver::OPENCL && interf==clBLAS)
{
cl_command_queue clqueue = queue.handle().cl();