Fixed CUDA benchmark

This commit is contained in:
Philippe
2015-02-05 23:42:31 -05:00
parent 58fdc5d18e
commit 7fc2348924
3 changed files with 13 additions and 14 deletions

View File

@@ -37,7 +37,7 @@ string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
foreach(PROG blas overhead) foreach(PROG blas overhead)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR})
if(CUDA_FOUND) if(CUDA_FOUND)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR}") set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR} -std=c++11")
set(CUPROG ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cu) set(CUPROG ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cu)
file(COPY ${PROG}.cpp DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY ${PROG}.cpp DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cpp ${CUPROG}) file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cpp ${CUPROG})

View File

@@ -53,20 +53,20 @@ void bench(ad::numeric_type dtype)
{\ {\
std::vector<long> times;\ std::vector<long> times;\
double total_time = 0;\ double total_time = 0;\
double time;\ float time;\
while(total_time*1e-3 < 1e-1){\
cudaEvent_t start, stop;\ cudaEvent_t start, stop;\
cudaEventCreate(&start);\ cudaEventCreate(&start);\
cudaEventCreate(&stop);\ cudaEventCreate(&stop);\
cudaEventRecord(start);\ while(total_time*1e-3 < 1e-1){\
cudaEventRecord(start,0);\
OP;\ OP;\
cudaEventRecord(stop);\ cudaEventRecord(stop,0);\
cudaEventSynchronize();\ cudaEventSynchronize(stop);\
cudaEventElapsedTime(&time, start, stop);\ cudaEventElapsedTime(&time, start, stop);\
times.push_back(time);\ times.push_back(time*1e6);\
total_time+=time;\ total_time+=time;\
}\ }\
double t = 1e-6*median(times);\ double t = median(times);\
std::cout << " " << PERF << std::flush;\ std::cout << " " << PERF << std::flush;\
} }
@@ -98,7 +98,7 @@ void bench(ad::numeric_type dtype)
T *cux, *cuy; T *cux, *cuy;
cudaMalloc((void**) &cux, N * sizeof(T)); cudaMalloc((void**) &cux, N * sizeof(T));
cudaMalloc((void**) &cuy, N * sizeof(T)); cudaMalloc((void**) &cuy, N * sizeof(T));
BENCHMARK(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t) BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
cudaFree(cux); cudaFree(cux);
cudaFree(cuy); cudaFree(cuy);
#endif #endif

View File

@@ -1,9 +1,8 @@
file(GLOB AMDAPPSDK_ROOT /opt/AMDAPPSDK*) file(GLOB AMDAPPSDK_ROOT /opt/AMDAPPSDK*)
find_package(CUDA QUIET) find_package(CUDA QUIET)
find_path(OPENCL_INCLUDE_DIR CL/cl.hpp HINTS ${AMDAPPSDK_ROOT}/include/ ${CUDA_SDK_ROOT_DIR}/include) find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${AMDAPPSDK_ROOT}/lib/x86_64/ ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/)
find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${AMDAPPSDK_ROOT}/lib/x86_64/ ${CUDA_SDK_ROOT_DIR}/lib64)
include(FindPackageHandleStandardArgs) include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIR) find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES)
mark_as_advanced(OpenCL) mark_as_advanced(OpenCL)