Fixed CUDA benchmark
This commit is contained in:
@@ -37,7 +37,7 @@ string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
|
|||||||
foreach(PROG blas overhead)
|
foreach(PROG blas overhead)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
if(CUDA_FOUND)
|
if(CUDA_FOUND)
|
||||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR}")
|
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR} -std=c++11")
|
||||||
set(CUPROG ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cu)
|
set(CUPROG ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cu)
|
||||||
file(COPY ${PROG}.cpp DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
|
file(COPY ${PROG}.cpp DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cpp ${CUPROG})
|
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${PROG}.cpp ${CUPROG})
|
||||||
|
@@ -53,20 +53,20 @@ void bench(ad::numeric_type dtype)
|
|||||||
{\
|
{\
|
||||||
std::vector<long> times;\
|
std::vector<long> times;\
|
||||||
double total_time = 0;\
|
double total_time = 0;\
|
||||||
double time;\
|
float time;\
|
||||||
while(total_time*1e-3 < 1e-1){\
|
|
||||||
cudaEvent_t start, stop;\
|
cudaEvent_t start, stop;\
|
||||||
cudaEventCreate(&start);\
|
cudaEventCreate(&start);\
|
||||||
cudaEventCreate(&stop);\
|
cudaEventCreate(&stop);\
|
||||||
cudaEventRecord(start);\
|
while(total_time*1e-3 < 1e-1){\
|
||||||
|
cudaEventRecord(start,0);\
|
||||||
OP;\
|
OP;\
|
||||||
cudaEventRecord(stop);\
|
cudaEventRecord(stop,0);\
|
||||||
cudaEventSynchronize();\
|
cudaEventSynchronize(stop);\
|
||||||
cudaEventElapsedTime(&time, start, stop);\
|
cudaEventElapsedTime(&time, start, stop);\
|
||||||
times.push_back(time);\
|
times.push_back(time*1e6);\
|
||||||
total_time+=time;\
|
total_time+=time;\
|
||||||
}\
|
}\
|
||||||
double t = 1e-6*median(times);\
|
double t = median(times);\
|
||||||
std::cout << " " << PERF << std::flush;\
|
std::cout << " " << PERF << std::flush;\
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -98,7 +98,7 @@ void bench(ad::numeric_type dtype)
|
|||||||
T *cux, *cuy;
|
T *cux, *cuy;
|
||||||
cudaMalloc((void**) &cux, N * sizeof(T));
|
cudaMalloc((void**) &cux, N * sizeof(T));
|
||||||
cudaMalloc((void**) &cuy, N * sizeof(T));
|
cudaMalloc((void**) &cuy, N * sizeof(T));
|
||||||
BENCHMARK(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
|
BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
|
||||||
cudaFree(cux);
|
cudaFree(cux);
|
||||||
cudaFree(cuy);
|
cudaFree(cuy);
|
||||||
#endif
|
#endif
|
||||||
|
@@ -1,9 +1,8 @@
|
|||||||
file(GLOB AMDAPPSDK_ROOT /opt/AMDAPPSDK*)
|
file(GLOB AMDAPPSDK_ROOT /opt/AMDAPPSDK*)
|
||||||
|
|
||||||
find_package(CUDA QUIET)
|
find_package(CUDA QUIET)
|
||||||
find_path(OPENCL_INCLUDE_DIR CL/cl.hpp HINTS ${AMDAPPSDK_ROOT}/include/ ${CUDA_SDK_ROOT_DIR}/include)
|
find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${AMDAPPSDK_ROOT}/lib/x86_64/ ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/)
|
||||||
find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${AMDAPPSDK_ROOT}/lib/x86_64/ ${CUDA_SDK_ROOT_DIR}/lib64)
|
|
||||||
|
|
||||||
include(FindPackageHandleStandardArgs)
|
include(FindPackageHandleStandardArgs)
|
||||||
find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIR)
|
find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES)
|
||||||
mark_as_advanced(OpenCL)
|
mark_as_advanced(OpenCL)
|
||||||
|
Reference in New Issue
Block a user