From 7fc23489243b14b38ea4a4b7789db5fe376b95f0 Mon Sep 17 00:00:00 2001 From: Philippe Date: Thu, 5 Feb 2015 23:42:31 -0500 Subject: [PATCH] Fixed CUDA benchmark --- bench/CMakeLists.txt | 2 +- bench/blas.cpp | 20 ++++++++++---------- cmake_modules/FindOpenCL.cmake | 5 ++--- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index 950089f64..5b75b011d 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -37,7 +37,7 @@ string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}") foreach(PROG blas overhead) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) if(CUDA_FOUND) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR}") + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR} -std=c++11") set(CUPROG ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cu) file(COPY ${PROG}.cpp DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cpp ${CUPROG}) diff --git a/bench/blas.cpp b/bench/blas.cpp index 112bf2bc0..970d770e8 100644 --- a/bench/blas.cpp +++ b/bench/blas.cpp @@ -53,20 +53,20 @@ void bench(ad::numeric_type dtype) {\ std::vector times;\ double total_time = 0;\ - double time;\ + float time;\ + cudaEvent_t start, stop;\ + cudaEventCreate(&start);\ + cudaEventCreate(&stop);\ while(total_time*1e-3 < 1e-1){\ - cudaEvent_t start, stop;\ - cudaEventCreate(&start);\ - cudaEventCreate(&stop);\ - cudaEventRecord(start);\ + cudaEventRecord(start,0);\ OP;\ - cudaEventRecord(stop);\ - cudaEventSynchronize();\ + cudaEventRecord(stop,0);\ + cudaEventSynchronize(stop);\ cudaEventElapsedTime(&time, start, stop);\ - times.push_back(time);\ + times.push_back(time*1e6);\ total_time+=time;\ }\ - double t = 1e-6*median(times);\ + double t = median(times);\ std::cout << " " << PERF << std::flush;\ } @@ -98,7 +98,7 @@ void bench(ad::numeric_type dtype) T *cux, *cuy; cudaMalloc((void**) &cux, N * sizeof(T)); cudaMalloc((void**) &cuy, N * sizeof(T)); - BENCHMARK(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t) + BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t) cudaFree(cux); cudaFree(cuy); #endif diff --git a/cmake_modules/FindOpenCL.cmake b/cmake_modules/FindOpenCL.cmake index 88b5c0874..10cf9b5c1 100644 --- a/cmake_modules/FindOpenCL.cmake +++ b/cmake_modules/FindOpenCL.cmake @@ -1,9 +1,8 @@ file(GLOB AMDAPPSDK_ROOT /opt/AMDAPPSDK*) find_package(CUDA QUIET) -find_path(OPENCL_INCLUDE_DIR CL/cl.hpp HINTS ${AMDAPPSDK_ROOT}/include/ ${CUDA_SDK_ROOT_DIR}/include) -find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${AMDAPPSDK_ROOT}/lib/x86_64/ ${CUDA_SDK_ROOT_DIR}/lib64) +find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${AMDAPPSDK_ROOT}/lib/x86_64/ ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIR) +find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES) mark_as_advanced(OpenCL)