diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index 291a0fc93..9bcf227f3 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -1,7 +1,7 @@ set(CMAKE_BUILD_TYPE Release) set(BLAS_DEF) -set(BLAS_LIBS atidlas) +set(BLAS_LIBS atidlas ${OPENCL_LIBRARIES}) #CUBLAS find_package(CUDA) @@ -28,7 +28,13 @@ endif() string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}") foreach(PROG blas overhead) - add_executable(${PROG}-bench ${PROG}.cpp) - target_link_libraries(${PROG}-bench ${BLAS_LIBS} ${OPENCL_LIBRARIES}) + if(CUDA_FOUND) + set(CUPROG ${CMAKE_CURRENT_SOURCE_DIR}/${PROG}.cu) + file(COPY ${PROG}.cpp ${CUPROG}) + cuda_add_executable(${PROG}-bench ${CUPROG}) + else() + add_executable(${PROG}-bench ${PROG}.cpp) + endif() + target_link_libraries(${PROG}-bench ${BLAS_LIBS}) set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR}") endforeach(PROG) diff --git a/bench/blas.cpp b/bench/blas.cpp index e4127113f..de7585cb5 100644 --- a/bench/blas.cpp +++ b/bench/blas.cpp @@ -1,9 +1,15 @@ #include "atidlas/array.h" #include "atidlas/tools/timer.hpp" -#include "clAmdBlas.h" #include "common.hpp" -#include "cblas.h" - +#ifdef BENCH_CLAMDBLAS + #include "clAmdBlas.h" +#endif +#ifdef BENCH_CBLAS + #include "cblas.h" +#endif +#ifdef BENCH_CUBLAS + #include +#endif #include #include #include @@ -12,6 +18,7 @@ namespace ad = atidlas; typedef atidlas::int_t int_t; +template void bench(ad::numeric_type dtype) { unsigned int dtsize = ad::size_of(dtype); @@ -57,6 +64,15 @@ void bench(ad::numeric_type dtype) atidlas::copy(x, cx); atidlas::copy(y, cy); BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize)); +#endif + /* CuBLAS */ +#ifdef BENCH_CUBLAS + T *cux, *cuy; + cudaMalloc((void**) &cux, N * sizeof(T)); + cudaMalloc((void**) &cuy, N * sizeof(T)); + BENCHMARK(cublasSaxpy(N, 2, x, 1, y, 1), bandwidth(3*N, tres, dtsize)) + cudaFree(cux); + cudaFree(cuy); #endif std::cout << std::endl; } @@ -175,7 +191,7 @@ int main(int argc, char* argv[]) atidlas::cl::default_context_idx = device_idx; std::cout << "#Benchmark : BLAS" << std::endl; std::cout << "#----------------" << std::endl; - bench(ad::FLOAT_TYPE); + bench(ad::FLOAT_TYPE); #ifdef BENCH_CLAMDBLAS clAmdBlasTeardown();