Bench: now using host timer rather than events

This commit is contained in:
Philippe Tillet
2015-12-17 01:37:42 -05:00
parent 722dae528c
commit f99293816c
2 changed files with 37 additions and 31 deletions

View File

@@ -21,16 +21,12 @@
namespace sc = isaac; namespace sc = isaac;
typedef sc::int_t int_t; typedef sc::int_t int_t;
static long time_event(long sum, sc::driver::Event const & e)
{
return sum + e.elapsed_time();
}
template<class T> template<class T>
void bench(sc::numeric_type dtype, std::string operation) void bench(sc::numeric_type dtype, std::string operation)
{ {
Timer tmr;
// //
// MACROS FOR BENCHMARKING // MACROS FOR BENCHMARKING
// //
@@ -39,16 +35,18 @@ void bench(sc::numeric_type dtype, std::string operation)
#define BENCHMARK_ISAAC(OP, PERF) \ #define BENCHMARK_ISAAC(OP, PERF) \
{\ {\
std::vector<double> times;\ std::vector<long> times;\
double total_time = 0;\ double total_time = 0;\
while(total_time*1e-9 < 1e-2){\
std::list<sc::driver::Event> events;\
OP;\ OP;\
queue.synchronize();\ queue.synchronize();\
times.push_back((double)std::accumulate(events.begin(), events.end(), 0, &time_event));\ while(total_time*1e-9 < 1e-1){\
tmr.start();\
OP;\
queue.synchronize();\
times.push_back(tmr.get().count());\
total_time+=times.back();\ total_time+=times.back();\
}\ }\
double t = mean(times);\ double t = min(times);\
std::cout << " " << (int)(PERF) << std::flush;\ std::cout << " " << (int)(PERF) << std::flush;\
} }
@@ -56,30 +54,32 @@ void bench(sc::numeric_type dtype, std::string operation)
{\ {\
std::vector<long> times;\ std::vector<long> times;\
double total_time = 0;\ double total_time = 0;\
while(total_time*1e-9 < 1e-2){\
cl_event event;\
OP;\ OP;\
queue.synchronize();\ queue.synchronize();\
times.push_back(sc::driver::Event(event).elapsed_time());\ while(total_time*1e-9 < 1e-1){\
tmr.start();\
OP;\
queue.synchronize();\
times.push_back(tmr.get().count());\
total_time+=times.back();\ total_time+=times.back();\
}\ }\
double t = mean(times);\ double t = min(times);\
std::cout << " " << (int)(PERF) << std::flush;\ std::cout << " " << (int)(PERF) << std::flush;\
} }
#define BENCHMARK_HOST(OP, PERF) \ #define BENCHMARK_HOST(OP, PERF) \
{\ {\
Timer tmr;\
long total_time = 0;\ long total_time = 0;\
std::vector<long> times;\ std::vector<long> times;\
while(total_time*1e-9 < 1e-2){\ OP;\
while(total_time*1e-9 < 1e-1){\
tmr.start();\ tmr.start();\
OP;\ OP;\
long time = tmr.get().count();\ long time = tmr.get().count();\
times.push_back(time);\ times.push_back(time);\
total_time += time;\ total_time += time;\
}\ }\
double t = mean(times);\ double t = min(times);\
std::cout << " " << (int)(PERF) << std::flush;\ std::cout << " " << (int)(PERF) << std::flush;\
} }
@@ -91,7 +91,7 @@ void bench(sc::numeric_type dtype, std::string operation)
cudaEvent_t start, stop;\ cudaEvent_t start, stop;\
cudaEventCreate(&start);\ cudaEventCreate(&start);\
cudaEventCreate(&stop);\ cudaEventCreate(&stop);\
while(total_time*1e-3 < 1e-2){\ while(total_time*1e-3 < 1e-1){\
cudaEventRecord(start,0);\ cudaEventRecord(start,0);\
OP;\ OP;\
cudaEventRecord(stop,0);\ cudaEventRecord(stop,0);\
@@ -100,7 +100,7 @@ void bench(sc::numeric_type dtype, std::string operation)
times.push_back(time*1e6);\ times.push_back(time*1e6);\
total_time+=time;\ total_time+=time;\
}\ }\
double t = mean(times);\ double t = min(times);\
std::cout << " " << (int)(PERF) << std::flush;\ std::cout << " " << (int)(PERF) << std::flush;\
} }
@@ -138,13 +138,12 @@ void bench(sc::numeric_type dtype, std::string operation)
std::cout << N; std::cout << N;
sc::array x(N, dtype), y(N, dtype); sc::array x(N, dtype), y(N, dtype);
/* ISAAC */ /* ISAAC */
std::list<sc::driver::Event> events; BENCHMARK_ISAAC(y = sc::execution_handler(x + alpha*y), 3*N*dtsize/t)
BENCHMARK_ISAAC(y = sc::execution_handler(x + alpha*y, sc::execution_options_type(0, &events)), 3*N*dtsize/t) BENCHMARK_ISAAC(y = sc::execution_handler(x + alpha*y, sc::execution_options_type(), sc::dispatcher_options_type(true)), 3*N*dtsize/t)
BENCHMARK_ISAAC(y = sc::execution_handler(x + alpha*y, sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), 3*N*dtsize/t)
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
if(x.context().backend()==sc::driver::OPENCL) if(x.context().backend()==sc::driver::OPENCL)
BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event), 3*N*dtsize/t); BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, NULL), 3*N*dtsize/t);
#endif #endif
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
@@ -171,11 +170,11 @@ void bench(sc::numeric_type dtype, std::string operation)
sc::array scratch(N, dtype); sc::array scratch(N, dtype);
sc::scalar s(dtype); sc::scalar s(dtype);
s = dot(x,y); queue.synchronize(); s = dot(x,y); queue.synchronize();
BENCHMARK_ISAAC(s = sc::execution_handler(dot(x,y), sc::execution_options_type(0, &events)), 2*N*dtsize/t) BENCHMARK_ISAAC(s = sc::execution_handler(dot(x,y)), 2*N*dtsize/t)
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
if(x.context().backend()==sc::driver::OPENCL) if(x.context().backend()==sc::driver::OPENCL)
BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event), 2*N*dtsize/t) BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, NULL), 2*N*dtsize/t)
#endif #endif
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
@@ -234,11 +233,11 @@ void bench(sc::numeric_type dtype, std::string operation)
#ifdef HAS_A_BLAS #ifdef HAS_A_BLAS
int_t lda = A.stride()[1]; int_t lda = A.stride()[1];
#endif #endif
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T,x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t); BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T,x):dot(A,x)),(M*N + M + N)*dtsize/t);
BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T,x):dot(A,x), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)),(M*N + M + N)*dtsize/t); BENCHMARK_ISAAC(y = sc::execution_handler(AT?dot(A.T,x):dot(A,x), sc::execution_options_type(), sc::dispatcher_options_type(true)),(M*N + M + N)*dtsize/t);
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
if(y.context().backend()==sc::driver::OPENCL) if(y.context().backend()==sc::driver::OPENCL)
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t) BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, NULL), (M*N + M + N)*dtsize/t)
#endif #endif
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cA(M*N), cx(N), cy(M); std::vector<float> cA(M*N), cx(N), cy(M);
@@ -313,13 +312,13 @@ void bench(sc::numeric_type dtype, std::string operation)
#ifdef HAS_A_BLAS #ifdef HAS_A_BLAS
int_t lda = A.stride()[1], ldb = B.stride()[1], ldc = C.stride()[1]; int_t lda = A.stride()[1], ldb = B.stride()[1], ldc = C.stride()[1];
#endif #endif
BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t); BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B))), (double)2*M*N*K/t);
// BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t); // BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
if(C.context().backend()==sc::driver::OPENCL) if(C.context().backend()==sc::driver::OPENCL)
BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb, BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb,
0, CL_HANDLE(C.data()), 0, ldc, 1, &CL_HANDLE(queue),0, NULL, &event), (double)2*M*N*K/t) 0, CL_HANDLE(C.data()), 0, ldc, 1, &CL_HANDLE(queue),0, NULL, NULL), (double)2*M*N*K/t)
#endif #endif
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS

View File

@@ -2,6 +2,7 @@
#define ISAAC_BENCH_COMMON_HPP_ #define ISAAC_BENCH_COMMON_HPP_
#include <chrono> #include <chrono>
#include <algorithm>
template<std::size_t> struct int_{}; template<std::size_t> struct int_{};
@@ -61,6 +62,12 @@ T median(std::vector<T> x)
return x[size / 2]; return x[size / 2];
} }
template<class T>
T min(std::vector<T> x)
{
return *std::min_element(x.begin(), x.end());
}
template<class T> template<class T>
T mean(std::vector<T> x) T mean(std::vector<T> x)
{ {