tentative cuda benchmark integration

x
This commit is contained in:
Philippe Tillet
2015-01-27 15:32:59 -05:00
parent be006268d7
commit c12ec4cebd
2 changed files with 29 additions and 7 deletions

View File

@@ -1,7 +1,7 @@
set(CMAKE_BUILD_TYPE Release) set(CMAKE_BUILD_TYPE Release)
set(BLAS_DEF) set(BLAS_DEF)
set(BLAS_LIBS atidlas) set(BLAS_LIBS atidlas ${OPENCL_LIBRARIES})
#CUBLAS #CUBLAS
find_package(CUDA) find_package(CUDA)
@@ -28,7 +28,13 @@ endif()
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}") string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
foreach(PROG blas overhead) foreach(PROG blas overhead)
if(CUDA_FOUND)
set(CUPROG ${CMAKE_CURRENT_SOURCE_DIR}/${PROG}.cu)
file(COPY ${PROG}.cpp ${CUPROG})
cuda_add_executable(${PROG}-bench ${CUPROG})
else()
add_executable(${PROG}-bench ${PROG}.cpp) add_executable(${PROG}-bench ${PROG}.cpp)
target_link_libraries(${PROG}-bench ${BLAS_LIBS} ${OPENCL_LIBRARIES}) endif()
target_link_libraries(${PROG}-bench ${BLAS_LIBS})
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR}") set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR}")
endforeach(PROG) endforeach(PROG)

View File

@@ -1,9 +1,15 @@
#include "atidlas/array.h" #include "atidlas/array.h"
#include "atidlas/tools/timer.hpp" #include "atidlas/tools/timer.hpp"
#include "clAmdBlas.h"
#include "common.hpp" #include "common.hpp"
#include "cblas.h" #ifdef BENCH_CLAMDBLAS
#include "clAmdBlas.h"
#endif
#ifdef BENCH_CBLAS
#include "cblas.h"
#endif
#ifdef BENCH_CUBLAS
#include <cublas.h>
#endif
#include <iomanip> #include <iomanip>
#include <stdlib.h> #include <stdlib.h>
#include <cmath> #include <cmath>
@@ -12,6 +18,7 @@
namespace ad = atidlas; namespace ad = atidlas;
typedef atidlas::int_t int_t; typedef atidlas::int_t int_t;
template<class T>
void bench(ad::numeric_type dtype) void bench(ad::numeric_type dtype)
{ {
unsigned int dtsize = ad::size_of(dtype); unsigned int dtsize = ad::size_of(dtype);
@@ -57,6 +64,15 @@ void bench(ad::numeric_type dtype)
atidlas::copy(x, cx); atidlas::copy(x, cx);
atidlas::copy(y, cy); atidlas::copy(y, cy);
BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize)); BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize));
#endif
/* CuBLAS */
#ifdef BENCH_CUBLAS
T *cux, *cuy;
cudaMalloc((void**) &cux, N * sizeof(T));
cudaMalloc((void**) &cuy, N * sizeof(T));
BENCHMARK(cublasSaxpy(N, 2, x, 1, y, 1), bandwidth(3*N, tres, dtsize))
cudaFree(cux);
cudaFree(cuy);
#endif #endif
std::cout << std::endl; std::cout << std::endl;
} }
@@ -175,7 +191,7 @@ int main(int argc, char* argv[])
atidlas::cl::default_context_idx = device_idx; atidlas::cl::default_context_idx = device_idx;
std::cout << "#Benchmark : BLAS" << std::endl; std::cout << "#Benchmark : BLAS" << std::endl;
std::cout << "#----------------" << std::endl; std::cout << "#----------------" << std::endl;
bench(ad::FLOAT_TYPE); bench<float>(ad::FLOAT_TYPE);
#ifdef BENCH_CLAMDBLAS #ifdef BENCH_CLAMDBLAS
clAmdBlasTeardown(); clAmdBlasTeardown();