Bench: Now displaying results in a table
This commit is contained in:
@@ -19,20 +19,20 @@ if(CLBLAS_FOUND)
|
||||
set(BLAS_LIBS ${BLAS_LIBS} ${CLBLAS_LIBRARIES} )
|
||||
endif()
|
||||
|
||||
#CBLAS
|
||||
find_package(MKL QUIET)
|
||||
if(MKL_FOUND)
|
||||
set(BLAS_DEF ${BLAS_DEF} "-DHAS_A_BLAS -DBENCH_MKL -DBENCH_CBLAS")
|
||||
include_directories(${MKL_INCLUDE_DIR})
|
||||
set(BLAS_LIBS ${BLAS_LIBS} ${MKL_LIBRARIES} )
|
||||
else()
|
||||
find_package(OpenBlas)
|
||||
if(OPENBLAS_FOUND)
|
||||
set(BLAS_DEF ${BLAS_DEF} "-DHAS_A_BLAS -DBENCH_CBLAS")
|
||||
include_directories(${OPENBLAS_INCLUDE_DIR})
|
||||
set(BLAS_LIBS ${BLAS_LIBS} ${OPENBLAS_LIBRARIES} )
|
||||
endif()
|
||||
endif()
|
||||
##CBLAS
|
||||
#find_package(MKL QUIET)
|
||||
#if(MKL_FOUND)
|
||||
# set(BLAS_DEF ${BLAS_DEF} "-DHAS_A_BLAS -DBENCH_MKL")
|
||||
# include_directories(${MKL_INCLUDE_DIR})
|
||||
# set(BLAS_LIBS ${BLAS_LIBS} ${MKL_LIBRARIES} )
|
||||
#else()
|
||||
# find_package(OpenBlas)
|
||||
# if(OPENBLAS_FOUND)
|
||||
# set(BLAS_DEF ${BLAS_DEF} "-DHAS_A_BLAS -DBENCH_CBLAS")
|
||||
# include_directories(${OPENBLAS_INCLUDE_DIR})
|
||||
# set(BLAS_LIBS ${BLAS_LIBS} ${OPENBLAS_LIBRARIES} )
|
||||
# endif()
|
||||
#endif()
|
||||
|
||||
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
|
||||
|
||||
|
110
bench/blas.cpp
110
bench/blas.cpp
@@ -49,7 +49,7 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
total_time+=times.back();\
|
||||
}\
|
||||
double t = min(times);\
|
||||
std::cout << " " << (int)(PERF) << std::flush;\
|
||||
std::cout << "\t" << (int)PERF << std::flush;\
|
||||
}
|
||||
|
||||
#define BENCHMARK_CLBLAS(OP, PERF) \
|
||||
@@ -66,7 +66,7 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
total_time+=times.back();\
|
||||
}\
|
||||
double t = min(times);\
|
||||
std::cout << " " << (int)(PERF) << std::flush;\
|
||||
std::cout << "\t" << PERF << std::flush;\
|
||||
}
|
||||
|
||||
#define BENCHMARK_HOST(OP, PERF) \
|
||||
@@ -82,7 +82,7 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
total_time += time;\
|
||||
}\
|
||||
double t = min(times);\
|
||||
std::cout << " " << (int)(PERF) << std::flush;\
|
||||
std::cout << "\t" << PERF << std::flush;\
|
||||
}
|
||||
|
||||
#define BENCHMARK_CUDA(OP, PERF) \
|
||||
@@ -99,27 +99,14 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
total_time+=times.back();\
|
||||
}\
|
||||
double t = min(times);\
|
||||
std::cout << " " << (int)(PERF) << std::flush;\
|
||||
std::cout << "\t" << PERF << std::flush;\
|
||||
}
|
||||
|
||||
unsigned int dtsize = sc::size_of(dtype);
|
||||
sc::driver::CommandQueue & queue = sc::driver::backend::queues::get(sc::driver::backend::contexts::get_default(),0);
|
||||
std::map<std::string, std::string> metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}};
|
||||
sc::array flush((int)1e6, sc::FLOAT_TYPE);
|
||||
std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl;
|
||||
std::cout << "\"N\"";
|
||||
std::cout << " \"ISAAC\"";
|
||||
// std::cout << " \"ISAAC (Best impl.)\"";
|
||||
#ifdef BENCH_CLBLAS
|
||||
std::cout << " \"clBLAS\"";
|
||||
#endif
|
||||
#ifdef BENCH_CBLAS
|
||||
std::cout << " \"BLAS\"";
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
std::cout << " \"cuBLAS\"";
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
|
||||
//
|
||||
// RUN BENCHMARKS
|
||||
//
|
||||
@@ -250,53 +237,51 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
|
||||
if(operation.substr(0,4)=="gemm")
|
||||
{
|
||||
std::vector<std::tuple<std::string, char, char, int_t, int_t, int_t> > MNKs;
|
||||
std::vector<std::tuple<std::string, int_t, int_t, int_t, char, char> > MNKs;
|
||||
//Square
|
||||
MNKs.push_back(std::make_tuple("square896",'N','T',896,896,896));
|
||||
MNKs.push_back(std::make_tuple("square2560",'N','T',2560,2560,2560));
|
||||
|
||||
//Convolution
|
||||
MNKs.push_back(std::make_tuple("conv1",'N','N',3025,64,363));
|
||||
MNKs.push_back(std::make_tuple("conv2",'N','N',729,192,1600));
|
||||
MNKs.push_back(std::make_tuple("conv3",'N','N',169,384,1728));
|
||||
MNKs.push_back(std::make_tuple("conv4",'N','N',169,256,3456));
|
||||
MNKs.push_back(std::make_tuple("conv5",'N','N',169,128,2304));
|
||||
|
||||
// //Convolution Gradient-1
|
||||
// MNKs.push_back(std::make_tuple("convgrad5-1]",'T','N',2304,256,169));
|
||||
// MNKs.push_back(std::make_tuple("convgrad4-1]",'T','N',3456,256,169));
|
||||
// MNKs.push_back(std::make_tuple("convgrad3-1]",'T','N',1728,384,169));
|
||||
// MNKs.push_back(std::make_tuple("convgrad2-1]",'T','N',1600,192,729));
|
||||
// MNKs.push_back(std::make_tuple("convgrad1-1]",'T','N',363,64,3025));
|
||||
|
||||
// //Convolution Gradient-2
|
||||
// MNKs.push_back(std::make_tuple("convgrad5-2]",'N','T',169,2304,256));
|
||||
// MNKs.push_back(std::make_tuple("convgrad4-2]",'N','T',169,3456,256));
|
||||
// MNKs.push_back(std::make_tuple("convgrad3-2]",'N','T',169,1728,384));
|
||||
// MNKs.push_back(std::make_tuple("convgrad2-2]",'N','T',729,1600,192));
|
||||
// MNKs.push_back(std::make_tuple("convgrad1-2]",'N','T',3025,363,64));
|
||||
|
||||
MNKs.push_back(std::make_tuple("Square",896,896,896,'N','T'));
|
||||
MNKs.push_back(std::make_tuple("Square",2560,2560,2560,'N','T'));
|
||||
//DeepBench
|
||||
for(size_t MK: std::vector<size_t>{1760, 2048, 2560})
|
||||
for(size_t N: std::vector<size_t>{16, 32, 64, 128, MK})
|
||||
MNKs.push_back(std::make_tuple("Deep", MK, N, MK, 'N', 'N'));
|
||||
for(size_t MK: std::vector<size_t>{1760, 2048, 2560})
|
||||
for(size_t N: std::vector<size_t>{16, 32, 64, 128, MK})
|
||||
MNKs.push_back(std::make_tuple("Deep", MK, N, MK, 'T', 'N'));
|
||||
for(size_t MK: std::vector<size_t>{1760})
|
||||
MNKs.push_back(std::make_tuple("Deep", MK, 1733, MK, 'N', 'T'));
|
||||
//Covariance (e.g., ICA, 10minutes/100Hz)
|
||||
MNKs.push_back(std::make_tuple("ica32",'N','T',32,32,60000));
|
||||
MNKs.push_back(std::make_tuple("ica256",'N','T',256,256,60000));
|
||||
MNKs.push_back(std::make_tuple("Cov",32,32,60000,'N','T'));
|
||||
MNKs.push_back(std::make_tuple("Cov",256,256,60000,'N','T'));
|
||||
//Bi-diagonalization
|
||||
MNKs.push_back(std::make_tuple("Lapack",4096,4096,32,'N','T'));
|
||||
MNKs.push_back(std::make_tuple("Lapack",3456,3456,32,'N','T'));
|
||||
MNKs.push_back(std::make_tuple("Lapack",896,896,32,'N','T'));
|
||||
|
||||
// //Bi-diagonalization
|
||||
MNKs.push_back(std::make_tuple("32rank1-4096",'N','T',4096,4096,32));
|
||||
MNKs.push_back(std::make_tuple("32rank1-3456",'N','T',3456,3456,32));
|
||||
MNKs.push_back(std::make_tuple("32rank1-896",'N','T',896,896,32));
|
||||
std::cout << "BENCH\tM\tN\tK\ta_t\tb_t\tISAAC";;
|
||||
#ifdef BENCH_CLBLAS
|
||||
std::cout << "\tclBLAS";
|
||||
#endif
|
||||
#ifdef BENCH_CBLAS
|
||||
std::cout << "\tBLAS";
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
std::cout << "\tcuBLAS";
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
|
||||
/*---------*/
|
||||
/*--BLAS3--*/
|
||||
/*---------*/
|
||||
for(std::tuple<std::string, char, char, int_t, int_t, int_t> MNK: MNKs)
|
||||
for(auto MNK: MNKs)
|
||||
{
|
||||
bool AT = std::get<1>(MNK)=='T';
|
||||
bool BT = std::get<2>(MNK)=='T';
|
||||
int_t M = std::get<3>(MNK);
|
||||
int_t N = std::get<4>(MNK);
|
||||
int_t K = std::get<5>(MNK);
|
||||
std::cout << "\"" << std::get<0>(MNK) << "\"";
|
||||
std::cout << std::flush;
|
||||
bool AT = std::get<4>(MNK)=='T';
|
||||
bool BT = std::get<5>(MNK)=='T';
|
||||
int_t M = std::get<1>(MNK);
|
||||
int_t N = std::get<2>(MNK);
|
||||
int_t K = std::get<3>(MNK);
|
||||
std::cout << std::get<0>(MNK) << "\t" << M << "\t" << N
|
||||
<< "\t" << K << "\t" << std::get<4>(MNK) << "\t" << std::get<5>(MNK);
|
||||
/* ISAAC */
|
||||
int_t As1 = M, As2 = K;
|
||||
if(AT) std::swap(As1, As2);
|
||||
@@ -307,13 +292,13 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
#ifdef HAS_A_BLAS
|
||||
int_t lda = A.stride()[1], ldb = B.stride()[1], ldc = C.stride()[1];
|
||||
#endif
|
||||
BENCHMARK_ISAAC(C = AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), (double)2*M*N*K/t);
|
||||
BENCHMARK_ISAAC(C = AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), (double)2*M*N*K/t*1e-3);
|
||||
// BENCHMARK_ISAAC(C = sc::execution_handler(AT?(BT?dot(A.T,B.T):dot(A.T,B)):(BT?dot(A,B.T):dot(A,B)), sc::execution_options_type(0), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
|
||||
/* clblas */
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(C.context().backend()==sc::driver::OPENCL)
|
||||
BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb,
|
||||
0, CL_HANDLE(C.data()), 0, ldc, 1, &CL_HANDLE(queue),0, NULL, NULL), (double)2*M*N*K/t)
|
||||
0, CL_HANDLE(C.data()), 0, ldc, 1, &CL_HANDLE(queue),0, NULL, NULL), (double)2*M*N*K/t*1e-3)
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
@@ -321,10 +306,10 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
sc::copy(C, cC);
|
||||
sc::copy(A, cA);
|
||||
sc::copy(B, cB);
|
||||
BENCHMARK_HOST(cblas_sgemm(CblasColMajor, AT?CblasTrans:CblasNoTrans, BT?CblasTrans:CblasNoTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t);
|
||||
BENCHMARK_HOST(cblas_sgemm(CblasColMajor, AT?CblasTrans:CblasNoTrans, BT?CblasTrans:CblasNoTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t*1e-3);
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
BENCHMARK_CUDA(cublasSgemm(AT?'t':'n', BT?'t':'n', M, N, K, 1, (T*)CU_HANDLE(A.data()), lda, (T*)CU_HANDLE(B.data()), ldb, 1, (T*)CU_HANDLE(C.data()), ldc), (double)2*M*N*K/t)
|
||||
BENCHMARK_CUDA(cublasSgemm(AT?'t':'n', BT?'t':'n', M, N, K, 1, (T*)CU_HANDLE(A.data()), lda, (T*)CU_HANDLE(B.data()), ldb, 1, (T*)CU_HANDLE(C.data()), ldc), (double)2*M*N*K/t*1e-3)
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
@@ -373,8 +358,7 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
|
||||
sc::driver::backend::default_device = device_idx;
|
||||
std::cout << "#Benchmark : BLAS" << std::endl;
|
||||
std::cout << "#----------------" << std::endl;
|
||||
std::cout << std::fixed << std::setprecision(2);
|
||||
bench<float>(sc::FLOAT_TYPE, operation);
|
||||
|
||||
#ifdef BENCH_CLBLAS
|
||||
|
@@ -74,6 +74,7 @@ public:
|
||||
SM_3_7,
|
||||
SM_5_0,
|
||||
SM_5_2,
|
||||
SM_6_1,
|
||||
|
||||
//AMD
|
||||
TERASCALE_2,
|
||||
|
@@ -81,6 +81,12 @@ Device::Architecture Device::architecture() const
|
||||
std::pair<unsigned int, unsigned int> sm = nv_compute_capability();
|
||||
switch(sm.first)
|
||||
{
|
||||
case 6:
|
||||
switch(sm.second)
|
||||
{
|
||||
case 1: return Architecture::SM_6_1;
|
||||
}
|
||||
|
||||
case 5:
|
||||
switch(sm.second)
|
||||
{
|
||||
|
@@ -33,6 +33,7 @@
|
||||
#include "database/nvidia/sm_3_0.hpp"
|
||||
#include "database/nvidia/sm_3_5.hpp"
|
||||
#include "database/nvidia/sm_5_2.hpp"
|
||||
#include "database/nvidia/sm_6_1.hpp"
|
||||
|
||||
//AMD
|
||||
#include "database/amd/gcn_1_1.hpp"
|
||||
@@ -60,6 +61,7 @@ const profiles::presets_type profiles::presets_ =
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_3_7, database::nvidia::sm_3_5),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_5_0, database::nvidia::sm_5_2),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_5_2, database::nvidia::sm_5_2),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_5_2, database::nvidia::sm_6_1),
|
||||
//AMD
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_1_0, database::amd::gcn_1_1),
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_1_1, database::amd::gcn_1_1),
|
||||
|
@@ -73,7 +73,7 @@ def main():
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/random/rand.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/matrix_product.cpp src/lib/jit/generation/base.cpp src/lib/runtime/execute.cpp src/lib/runtime/inference/database.cpp src/lib/runtime/inference/profiles.cpp src/lib/runtime/inference/predictors/random_forest.cpp src/lib/runtime/scheduler/dag.cpp src/lib/runtime/scheduler/strategies/heft.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/exception/api.cpp src/lib/exception/driver.cpp src/lib/value_scalar.cpp src/lib/random/rand.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/backend.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/buffer.cpp src/lib/driver/context.cpp src/lib/driver/dispatch.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/matrix_product.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/runtime/execute.cpp src/lib/runtime/scheduler/dag.cpp src/lib/runtime/scheduler/strategies/heft.cpp src/lib/runtime/inference/predictors/random_forest.cpp src/lib/runtime/inference/profiles.cpp src/lib/runtime/inference/database.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
Reference in New Issue
Block a user