2017-09-01 21:55:03 -04:00
# include "opts.hpp"
# include "isaac/scalar.h"
# include "isaac/api.h"
# include "isaac/driver/cublas.h"
# include "isaac/driver/backend.h"
# include "isaac/driver/context.h"
# include "isaac/driver/stream.h"
# include "isaac/runtime/predict.h"
# include "isaac/templates/gemm.h"
# include "isaac/templates/error.hpp"
# include "isaac/tools/bench.hpp"
namespace sc = isaac ;
namespace drv = sc : : driver ;
using sc : : param_t ;
enum Code {
RESET = 0 ,
BOLD = 1 ,
ITALIC = 3 ,
FG_RED = 31 ,
FG_GREEN = 32 ,
FG_YELLOW = 33 ,
FG_BLUE = 34 ,
FG_MAGENTA = 35 ,
FG_CYAN = 36 ,
FG_LIGHT_GRAY = 37 ,
FG_DARK_GRAY = 90 ,
FG_LIGHT_RED = 91 ,
FG_LIGHT_GREEN = 92 ,
FG_LIGHT_YELLOW = 93 ,
FG_LIGHT_BLUE = 94 ,
FG_LIGHT_MAGENTA = 95 ,
FG_LIGHT_CYAN = 96 ,
FG_WHITE = 97
} ;
class color_stream {
Code code ;
public :
color_stream ( Code pCode ) : code ( pCode ) { }
friend std : : ostream &
operator < < ( std : : ostream & os , const color_stream & mod ) {
return os < < " \033 [ " < < mod . code < < " m " ;
}
} ;
/* Helpers for benchmarking */
typedef std : : tuple < sc : : DType , sc : : IsaacOperation_t , sc : : IsaacOperation_t , sc : : param_t , sc : : param_t , sc : : param_t > gemm_params_t ;
2017-11-09 13:11:41 -05:00
typedef std : : tuple < sc : : DType , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t > conv_params_t ;
typedef std : : tuple < sc : : DType , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t > pool_params_t ;
2017-09-01 21:55:03 -04:00
2018-03-21 19:36:23 -04:00
struct SC17 {
2017-09-01 21:55:03 -04:00
// GEMM
static std : : vector < gemm_params_t > gemm ( sc : : DType dtype ) {
std : : vector < gemm_params_t > shapes ;
// LinPack
for ( param_t N : std : : vector < param_t > { 512 , 1024 , 2048 } )
shapes . push_back ( std : : make_tuple ( dtype , sc : : ISAAC_OP_N , sc : : ISAAC_OP_T , N , N , N ) ) ;
// DeepBench
for ( sc : : IsaacOperation_t AT : std : : vector < sc : : IsaacOperation_t > { sc : : ISAAC_OP_N , sc : : ISAAC_OP_T } )
for ( param_t M : std : : vector < param_t > { 1760 } )
for ( param_t N : std : : vector < param_t > { 16 , 32 , 64 , 128 } )
shapes . push_back ( std : : make_tuple ( dtype , AT , sc : : ISAAC_OP_N , M , N , M ) ) ;
// PCA/ICA
for ( param_t N : std : : vector < param_t > { 16 , 64 , 256 } )
for ( param_t K : std : : vector < param_t > { 64000 } )
shapes . push_back ( std : : make_tuple ( dtype , sc : : ISAAC_OP_N , sc : : ISAAC_OP_T , N , N , K ) ) ;
// LaPACK
for ( param_t N : std : : vector < param_t > { 1024 , 2048 , 4096 } )
for ( param_t K : std : : vector < param_t > { 32 } )
shapes . push_back ( std : : make_tuple ( dtype , sc : : ISAAC_OP_N , sc : : ISAAC_OP_T , N , N , K ) ) ;
return shapes ;
}
// CONV
static std : : vector < conv_params_t > conv ( sc : : DType dtype ) {
2017-11-09 13:11:41 -05:00
// Vector of (dtype, D, W, H, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w)
std : : vector < conv_params_t > shapes ;
2018-03-21 19:36:23 -04:00
// // DeepSpeech
// for(size_t N: std::vector<size_t>{8})
// shapes.push_back(std::make_tuple(dtype, 1, 700, 161, 1, N, 32, 1, 5, 20, 0, 0, 0, 1, 1, 1));
// for(size_t N: std::vector<size_t>{8})
// shapes.push_back(std::make_tuple(dtype, 1, 341, 79, 32, N, 32, 1, 5, 10, 0, 0, 0, 1, 1, 1));
// // OCR
// shapes.push_back(std::make_tuple(dtype, 1, 480, 48, 1, 16, 16, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 240, 24, 16, 16, 32, 1, 3, 3, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 120, 12, 32, 16, 64, 1, 3, 3, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 60, 6, 64, 16, 128, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// // Face Recognition
// shapes.push_back(std::make_tuple(dtype, 1, 108, 108, 3, 8, 64, 1, 3, 3, 0, 1, 1, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 54, 54, 64, 8, 64, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 27, 27, 128, 8, 128, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 128, 8, 256, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 256, 8, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// // Vision
// for(size_t N: std::vector<size_t>{8}){
// shapes.push_back(std::make_tuple(dtype, 1, 224, 224, 3, N, 64, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 112, 112, 64, N, 128, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 56, 56, 128, N, 256, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 256, N, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 512, N, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 512, N, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// }
// shapes.push_back(std::make_tuple(dtype, 1, 224, 224, 3, 16, 64, 1, 7, 7, 0, 3, 3, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 192, 16, 32, 1, 5, 5, 0, 2, 2, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 192, 16, 64, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 512, 16, 48, 1, 5, 5, 0, 2, 2, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 512, 16, 192, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 832, 16, 256, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 832, 16, 128, 1, 5, 5, 0, 2, 2, 1, 1, 1));
// // Speaker ID
// shapes.push_back(std::make_tuple(dtype, 1, 350, 80, 64, 16, 128, 1, 5, 5, 0, 1, 1, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 175, 40, 128, 16, 256, 1, 5, 5, 0, 1, 1, 1, 2, 2));
// // ResNET
// for(size_t N: std::vector<size_t>{8}){
// shapes.push_back(std::make_tuple(dtype, 1, 112, 112, 64, N, 64, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 56, 56, 64, N, 256, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 56, 56, 256, N, 64, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 56, 56, 256, N, 128, 1, 1, 1, 0, 0, 0, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 128, N, 512, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 512, N, 128, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 512, N, 256, 1, 1, 1, 0, 0, 0, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 256, N, 1024, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 512, N, 1024, 1, 1, 1, 0, 0, 0, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 1024, N, 2048, 1, 1, 1, 0, 0, 0, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 512, N, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 512, N, 2048, 1, 1, 1, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 1024, N, 2048, 1, 1, 1, 0, 1, 1, 1, 2, 2));
// }
2017-09-01 21:55:03 -04:00
2017-11-09 13:11:41 -05:00
// 3D-Unet
shapes . push_back ( std : : make_tuple ( dtype , 31 , 204 , 204 , 4 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 29 , 202 , 202 , 24 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 27 , 100 , 100 , 24 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 25 , 98 , 98 , 72 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 23 , 48 , 48 , 72 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 21 , 46 , 46 , 216 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 19 , 22 , 22 , 216 , 1 , 648 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 17 , 20 , 20 , 648 , 1 , 648 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 15 , 36 , 36 , 648 , 1 , 432 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 13 , 36 , 36 , 432 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 34 , 34 , 216 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 64 , 64 , 216 , 1 , 144 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 64 , 64 , 144 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 9 , 62 , 62 , 72 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 7 , 120 , 120 , 72 , 1 , 48 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 5 , 120 , 120 , 48 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 3 , 118 , 118 , 24 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
2018-03-21 19:36:23 -04:00
shapes . push_back ( std : : make_tuple ( dtype , 1 , 116 , 116 , 4 , 1 , 24 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
2017-11-09 13:11:41 -05:00
return shapes ;
}
// POOL
static std : : vector < pool_params_t > pool ( sc : : DType dtype ) {
std : : vector < pool_params_t > shapes ;
// 3D-Unet
2018-03-21 19:36:23 -04:00
shapes . push_back ( std : : make_tuple ( dtype , 31 , 204 , 204 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 29 , 202 , 202 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 27 , 100 , 100 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 25 , 98 , 98 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 23 , 48 , 48 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 21 , 46 , 46 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 19 , 22 , 22 , 1 , 648 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 17 , 20 , 20 , 1 , 648 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 15 , 36 , 36 , 1 , 432 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 13 , 36 , 36 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 34 , 34 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 64 , 64 , 1 , 144 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 64 , 64 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 9 , 62 , 62 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 7 , 120 , 120 , 1 , 48 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 5 , 120 , 120 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 3 , 118 , 118 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 116 , 116 , 1 , 24 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
2017-09-01 21:55:03 -04:00
return shapes ;
}
2017-11-09 13:11:41 -05:00
2017-09-01 21:55:03 -04:00
} ;
/* Metrics for benchmarking */
struct Metric {
virtual std : : function < bool ( double , double ) > cmp ( ) const = 0 ;
2017-11-09 13:11:41 -05:00
virtual double conv ( param_t P , param_t Q , param_t M , param_t K , param_t N , param_t C , param_t R , param_t S , param_t T , double tsec ) const = 0 ;
2017-09-01 21:55:03 -04:00
virtual double gemm ( param_t M , param_t N , param_t K , double tsec ) const = 0 ;
2017-11-09 13:11:41 -05:00
virtual double pool ( param_t P , param_t Q , param_t M , param_t K , param_t N , param_t , param_t , param_t , double tsec ) const = 0 ;
2017-09-01 21:55:03 -04:00
} ;
class FLOPS : public Metric {
public :
FLOPS ( double scale ) : scale_ ( scale ) { }
std : : function < bool ( double , double ) > cmp ( ) const { return std : : greater < double > ( ) ; }
2017-11-09 13:11:41 -05:00
double conv ( param_t P , param_t Q , param_t M , param_t K , param_t N , param_t C , param_t R , param_t S , param_t T , double tsec ) const
{ return sc : : templates : : Conv : : tflops ( P , Q , M , K , N , C , R , S , T , tsec ) * 1e12 / scale_ ; }
2017-09-01 21:55:03 -04:00
double gemm ( param_t M , param_t N , param_t K , double tsec ) const
{ return sc : : templates : : GEMM : : tflops ( M , N , K , tsec ) * 1e12 / scale_ ; }
2017-11-09 13:11:41 -05:00
double pool ( param_t P , param_t Q , param_t M , param_t K , param_t N , param_t T , param_t R , param_t S , double tsec ) const
{ return sc : : templates : : Pool : : tflops ( P , Q , M , K , N , T , R , S , tsec ) * 1e12 / scale_ ; }
2017-09-01 21:55:03 -04:00
private :
double scale_ ;
} ;
class Time : public Metric {
public :
Time ( double scale ) : scale_ ( scale ) { }
std : : function < bool ( double , double ) > cmp ( ) const { return std : : less < double > ( ) ; }
2017-11-09 13:11:41 -05:00
double conv ( param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , double tsec ) const { return tsec * 1e-9 / scale_ ; }
2017-09-01 21:55:03 -04:00
double gemm ( param_t , param_t , param_t , double tsec ) const { return tsec * 1e-9 / scale_ ; }
2017-11-09 13:11:41 -05:00
double pool ( param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , double tsec ) const { return tsec * 1e-9 / scale_ ; }
2017-09-01 21:55:03 -04:00
private :
double scale_ ;
} ;
void print_results_header ( std : : vector < std : : string > sections ) {
std : : cout < < color_stream ( ITALIC ) < < color_stream ( BOLD ) ;
std : : copy ( sections . begin ( ) , sections . end ( ) , std : : ostream_iterator < std : : string > ( std : : cout , " \t " ) ) ;
std : : cout < < color_stream ( RESET ) < < std : : endl ;
}
void print_results ( std : : vector < double > const & times , std : : vector < std : : string > const & prefix , std : : function < bool ( double , double ) > cmp , std : : function < double ( double ) > fn ) {
std : : copy ( prefix . begin ( ) , prefix . end ( ) , std : : ostream_iterator < std : : string > ( std : : cout , " \t " ) ) ;
std : : vector < double > perf ;
std : : transform ( times . begin ( ) , times . end ( ) , std : : back_inserter ( perf ) , fn ) ;
auto fastest = perf ;
std : : sort ( fastest . begin ( ) , fastest . end ( ) , cmp ) ;
for ( auto x : perf ) {
if ( x = = fastest [ 0 ] & & x / fastest [ 1 ] > 1.05 )
std : : cout < < color_stream ( FG_LIGHT_BLUE ) < < x < < color_stream ( RESET ) ;
else
std : : cout < < x ;
std : : cout < < " \t " ;
}
std : : cout < < std : : endl ;
}
void benchmark_gemm ( Metric const & metric , sc : : driver : : Context & ctx , sc : : driver : : Device & device , sc : : driver : : Stream & stream ,
sc : : DType dtype , sc : : IsaacOperation_t AT , sc : : IsaacOperation_t BT , size_t M , size_t N , size_t K ,
sc : : templates : : Generator * generator ) {
size_t ldc = M ;
size_t lda = ( AT = = sc : : ISAAC_OP_N ) ? M : K ;
size_t ldb = ( BT = = sc : : ISAAC_OP_N ) ? K : N ;
size_t dtsize = sc : : size_of ( dtype ) ;
sc : : scalar alpha ( 1. , dtype ) ;
sc : : scalar beta ( 0. , dtype ) ;
char cuAT = ( AT = = sc : : ISAAC_OP_T ) ? ' T ' : ' N ' ;
char cuBT = ( BT = = sc : : ISAAC_OP_T ) ? ' T ' : ' N ' ;
sc : : driver : : Buffer C ( ctx , M * N * dtsize ) ;
sc : : driver : : Buffer A ( ctx , M * K * dtsize ) ;
sc : : driver : : Buffer B ( ctx , K * N * dtsize ) ;
std : : vector < double > times ;
2018-03-21 19:36:23 -04:00
times . push_back ( bench ( [ & ] ( ) { sc : : GEMM ( device , stream , dtype , dtype , AT , BT , M , N , K , 0 , lda , 0 , ldb , 0 , ldc , alpha , A , B , beta , C , 1. , 1. , 1. , NULL , ( sc : : templates : : GEMM * ) generator ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
2017-09-01 21:55:03 -04:00
if ( sc : : driver : : dispatch : : cublasinit ( ) ) {
2017-11-09 13:11:41 -05:00
cublasGemmAlgo_t fastest ;
sc : : driver : : cublasGemm ( dtype , stream , cuAT , cuBT , M , N , K , alpha , A , lda , B , ldb , beta , C , ldc , & fastest ) ;
times . push_back ( bench ( [ & ] ( ) { sc : : driver : : cublasGemm ( dtype , stream , cuAT , cuBT , M , N , K , alpha , A , lda , B , ldb , beta , C , ldc , NULL , fastest ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
//times.push_back(bench([&](){ sc::driver::cublasGemm(dtype, stream, cuAT, cuBT, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); }, [&](){ stream.synchronize(); }, device));
2017-09-01 21:55:03 -04:00
}
print_results ( times , { str ( AT ) , str ( BT ) , str ( M ) , str ( N ) , str ( K ) } , metric . cmp ( ) , [ & ] ( double tsec ) { return metric . gemm ( M , N , K , tsec ) ; } ) ;
}
void benchmark_conv ( Metric const & metric , sc : : driver : : Context & ctx , sc : : driver : : Device & device , sc : : driver : : Stream & stream ,
2018-03-21 19:36:23 -04:00
sc : : DType in_dtype , sc : : DType out_dtype , size_t D , size_t H , size_t W , size_t C , size_t N , size_t K , size_t T , size_t R , size_t S ,
size_t pad_d , size_t pad_h , size_t pad_w ,
size_t stride_d , size_t stride_h , size_t stride_w ,
size_t upsample_d , size_t upsample_h , size_t upsample_w ,
2017-09-01 21:55:03 -04:00
sc : : templates : : Generator * generator ) {
2017-11-09 13:11:41 -05:00
2018-03-21 19:36:23 -04:00
param_t Zk = 0 , crop_z_m0 = 0 , crop_z_m1 = 0 , crop_z_p0 = 0 , crop_z_p1 = 0 , crop_z_q0 = 0 , crop_z_q1 = 0 ;
2017-11-09 13:11:41 -05:00
param_t M , P , Q ;
2018-03-21 19:36:23 -04:00
sc : : templates : : Conv : : output_shapes ( D , H , W , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , upsample_d , upsample_h , upsample_w , M , P , Q ) ;
sc : : ActivationType activation = sc : : Linear ;
2017-09-01 21:55:03 -04:00
2018-03-21 19:36:23 -04:00
size_t vect_c = ( in_dtype = = sc : : INT8X4_TYPE ) ? 4 : 1 ;
size_t vect_k = ( out_dtype = = sc : : INT8X4_TYPE ) ? 4 : 1 ;
2017-09-01 21:55:03 -04:00
2018-03-21 19:36:23 -04:00
sc : : DType ab_dtype = ( out_dtype = = sc : : INT8X4_TYPE ) ? sc : : FLOAT_TYPE : out_dtype ;
sc : : scalar alpha ( 1. , ab_dtype ) ;
sc : : scalar beta ( 0. , ab_dtype ) ;
sc : : driver : : Buffer O ( ctx , N * K / vect_k * M * P * Q * sc : : size_of ( out_dtype ) ) ;
sc : : driver : : Buffer I ( ctx , C / vect_c * D * H * W * N * sc : : size_of ( in_dtype ) ) ;
sc : : driver : : Buffer F ( ctx , K * C / vect_c * T * R * S * sc : : size_of ( in_dtype ) ) ;
2017-09-01 21:55:03 -04:00
std : : vector < double > times ;
2018-03-21 19:36:23 -04:00
times . push_back ( bench ( [ & ] ( ) { sc : : CONV ( device , stream , in_dtype , out_dtype , N , K , M , P , Q , C , T , R , S , D , H , W , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , upsample_d , upsample_h , upsample_w , I , F , & O , 1 , NULL , activation , 0. , 1. , 1. , { 1. } , 1. , sc : : NoResidual , Zk , crop_z_m0 , crop_z_m1 , crop_z_p0 , crop_z_p1 , crop_z_q0 , crop_z_q1 , NULL , ( sc : : templates : : Conv * ) generator ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
// if(sc::driver::dispatch::cudnninit())
// times.push_back(bench([&](){ sc::driver::cudnnConv(out_dtype, stream, D, H, W, N, K, M, P, Q, C, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, alpha, I, F, beta, O); }, [&](){ stream.synchronize(); }, device));
2017-11-09 13:11:41 -05:00
print_results ( times , { str ( N ) , str ( K ) , str ( M ) , str ( P ) , str ( Q ) , str ( C ) , str ( T ) , str ( R ) , str ( S ) } , metric . cmp ( ) , [ & ] ( double tsec ) { return metric . conv ( M , P , Q , K , N , C , T , R , S , tsec ) ; } ) ;
2017-09-01 21:55:03 -04:00
}
2017-11-09 13:11:41 -05:00
void benchmark_pool ( Metric const & metric , sc : : driver : : Context & ctx , sc : : driver : : Device & device , sc : : driver : : Stream & stream ,
sc : : DType dtype , size_t D , size_t H , size_t W , size_t N , size_t K , size_t T , size_t R , size_t S , size_t pad_d , size_t pad_h , size_t pad_w , size_t stride_d , size_t stride_h , size_t stride_w ,
sc : : templates : : Generator * generator ) {
param_t M , P , Q ;
2018-03-21 19:36:23 -04:00
sc : : templates : : Conv : : output_shapes ( D , H , W , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , 1 , 1 , 1 , M , P , Q ) ;
2017-11-09 13:11:41 -05:00
size_t dtsize = sc : : size_of ( dtype ) ;
sc : : scalar alpha ( 1. , dtype ) ;
sc : : scalar beta ( 0. , dtype ) ;
sc : : driver : : Buffer O ( ctx , N * K * M * P * Q * dtsize ) ;
sc : : driver : : Buffer I ( ctx , K * D * H * W * N * dtsize ) ;
std : : vector < double > times ;
2018-03-21 19:36:23 -04:00
times . push_back ( bench ( [ & ] ( ) { sc : : POOL ( device , stream , dtype , dtype , sc : : MaxPool , K , M , P , Q , N , T , R , S , D , H , W , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , I , O , 1. , 1. , ( sc : : templates : : Pool * ) generator ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
2017-11-09 13:11:41 -05:00
if ( sc : : driver : : dispatch : : cudnninit ( ) )
times . push_back ( bench ( [ & ] ( ) { sc : : driver : : cudnnPool ( dtype , stream , D , H , W , N , K , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , alpha , I , beta , O ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
print_results ( times , { str ( N ) , str ( K ) , str ( M ) , str ( P ) , str ( Q ) , str ( T ) , str ( R ) , str ( S ) } , metric . cmp ( ) , [ & ] ( double tsec ) { return metric . pool ( M , P , Q , K , N , T , R , S , tsec ) ; } ) ;
}
2017-09-01 21:55:03 -04:00
/* ------------------------------- */
void loop_nest ( std : : vector < size_t > const & ranges , std : : function < void ( std : : vector < size_t > const & ) > const & f ) {
size_t D = ranges . size ( ) ;
std : : vector < size_t > values ( D , 0 ) ;
// Start with innermost loop
size_t i = D - 1 ;
while ( true ) {
//Execute function
f ( values ) ;
//Increment counters
while ( values [ i ] + + = = ranges [ i ] - 1 ) {
if ( i = = 0 )
return ;
values [ i - - ] = 0 ;
}
i = D - 1 ;
}
}
template < class T >
void loop_nest ( std : : vector < std : : vector < T > > const & iterates , std : : function < void ( std : : vector < T > ) > const & f ) {
//Ranges to iterate over
std : : vector < size_t > ranges ;
for ( auto const & x : iterates )
ranges . push_back ( x . size ( ) ) ;
//Proxy function
auto proxy = [ & ] ( std : : vector < size_t > const & idx ) {
std : : vector < T > x ( iterates . size ( ) ) ;
for ( size_t i = 0 ; i < x . size ( ) ; + + i )
x [ i ] = iterates [ i ] [ idx [ i ] ] ;
f ( x ) ;
} ;
//Iterate
loop_nest ( ranges , proxy ) ;
}
2018-03-21 19:36:23 -04:00
void search_conv ( int32_t D , int32_t H , int32_t W ,
int32_t C , int32_t N , int32_t K ,
int32_t T , int32_t R , int32_t S ,
int32_t pad_d , int32_t pad_h , int32_t pad_w ,
int32_t stride_d , int32_t stride_h , int32_t stride_w ,
int32_t upsample_d , int32_t upsample_h , int32_t upsample_w ,
sc : : ActivationType activation , sc : : DType in_dtype , sc : : DType out_dtype )
{
2017-09-01 21:55:03 -04:00
auto ctx = drv : : backend : : contexts : : get_default ( ) ;
2017-11-09 13:11:41 -05:00
size_t P = ( H - R + 1 + 2 * pad_h + stride_h - 1 ) / stride_h ;
size_t Q = ( W - S + 1 + 2 * pad_w + stride_w - 1 ) / stride_w ;
size_t M = ( D - T + 1 + 2 * pad_d + stride_d - 1 ) / stride_d ;
2017-09-01 21:55:03 -04:00
2018-03-21 19:36:23 -04:00
size_t Zk = 0 , crop_z_m0 = 0 , crop_z_m1 = 0 , crop_z_p0 = 0 , crop_z_p1 = 0 , crop_z_q0 = 0 , crop_z_q1 = 0 ;
2017-09-01 21:55:03 -04:00
//Setup
2018-03-21 19:36:23 -04:00
drv : : Buffer O ( ctx , K * P * Q * M * N * sc : : size_of ( out_dtype ) ) ;
drv : : Buffer I ( ctx , C * H * W * D * N * sc : : size_of ( in_dtype ) ) ;
drv : : Buffer F ( ctx , C * R * S * T * K * sc : : size_of ( in_dtype ) ) ;
2017-09-01 21:55:03 -04:00
drv : : Stream stream ( ctx ) ;
//Exhaustive search
std : : vector < sc : : param_t > r1 = { 1 } ;
2017-11-09 13:11:41 -05:00
std : : vector < sc : : param_t > rv = { 4 } ;
std : : vector < sc : : param_t > rr = { 1 , 2 , 4 , 8 } ;
std : : vector < sc : : param_t > rl = { 4 , 8 , 16 , 32 } ;
std : : vector < sc : : param_t > rs = { 4 , 8 , 16 } ;
2017-09-01 21:55:03 -04:00
double best ;
2017-11-09 13:11:41 -05:00
loop_nest < sc : : param_t > ( { rv , rl , rl , rs , rs , rl , rl , r1 , rr , rr } , [ & ] ( std : : vector < sc : : param_t > const & x ) {
2018-03-21 19:36:23 -04:00
sc : : templates : : Conv generator ( in_dtype , out_dtype ,
C , D , H , W , N , K , M , P , Q , T , R , S ,
pad_d , pad_h , pad_w ,
stride_d , stride_h , stride_w ,
upsample_d , upsample_h , upsample_w ,
activation , 1 ,
sc : : NoResidual , Zk , crop_z_m0 , crop_z_m1 , crop_z_p0 , crop_z_p1 , crop_z_q0 , crop_z_q1 ,
x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] , x [ 4 ] , x [ 5 ] , x [ 6 ] , x [ 7 ] , x [ 8 ] ) ;
2017-09-01 21:55:03 -04:00
//Compile
try {
std : : string src = generator . dump ( ctx . device ( ) , " conv " ) ;
drv : : Module program ( ctx , src ) ;
drv : : Kernel kernel ( program , " conv " ) ;
2018-03-21 19:36:23 -04:00
double tsec = bench ( [ & ] ( ) { generator . enqueue ( kernel , stream , I , F , & O ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , ctx . device ( ) ) ;
2017-11-09 13:11:41 -05:00
double tflops = sc : : templates : : Conv : : tflops ( P , Q , M , K , N , C , R , S , T , tsec ) ;
2017-09-01 21:55:03 -04:00
best = std : : max ( tflops , best ) ;
std : : cout < < " // " ;
std : : copy ( x . begin ( ) , x . end ( ) , std : : ostream_iterator < int > ( std : : cout , " " ) ) ;
std : : cout < < " : " < < tflops < < " TFLOPS [BEST: " < < best < < " ] " < < std : : endl ;
} catch ( isaac : : templates : : invalid_parameters const & ) {
return ;
} catch ( drv : : exception : : cuda : : launch_out_of_resources const & ) {
return ;
}
} ) ;
std : : cout < < " ISAAC: " < < best < < std : : endl ;
}
void search_gemm ( int32_t M , int32_t N , int32_t K , sc : : IsaacOperation_t AT , sc : : IsaacOperation_t BT , sc : : DType dtype ) {
auto ctx = drv : : backend : : contexts : : get_default ( ) ;
size_t dtsize = sc : : size_of ( dtype ) ;
// Setup
size_t ldc = M ;
size_t lda = ( AT = = sc : : ISAAC_OP_N ) ? M : K ;
size_t ldb = ( BT = = sc : : ISAAC_OP_N ) ? K : N ;
int32_t offc = 0 , offa = 0 , offb = 0 ;
drv : : Buffer C ( ctx , M * N * dtsize ) ;
drv : : Buffer A ( ctx , M * K * dtsize ) ;
drv : : Buffer B ( ctx , K * N * dtsize ) ;
drv : : Stream stream ( ctx ) ;
sc : : scalar alpha ( 1. , dtype ) , beta ( 0. , dtype ) ;
// Exhaustive search
std : : vector < sc : : param_t > r1 = { 1 } ;
std : : vector < sc : : param_t > rv = { 4 } ;
std : : vector < sc : : param_t > rr = { 1 , 2 , 4 , 8 } ;
std : : vector < sc : : param_t > rl = { 4 , 8 , 16 , 32 } ;
std : : vector < sc : : param_t > rs = { 4 , 8 , 16 } ;
double best = 0 ;
loop_nest < sc : : param_t > ( { rv , rl , rl , rl , rs , r1 , rs , rl , rl , rl , rl , r1 , rr , rr } , [ & ] ( std : : vector < sc : : param_t > const & x ) {
2018-03-21 19:36:23 -04:00
isaac : : templates : : GEMM generator ( dtype , dtype , AT , BT , M , N , K , offa , lda , offb , ldb , offc , ldc , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] , x [ 4 ] , x [ 5 ] , x [ 6 ] , x [ 7 ] , x [ 8 ] , x [ 9 ] , x [ 10 ] , x [ 11 ] , x [ 12 ] , x [ 13 ] ) ;
2017-09-01 21:55:03 -04:00
// Compile
try {
std : : string src = generator . dump ( ctx . device ( ) , " gemm " ) ;
drv : : Module program ( ctx , src ) ;
drv : : Kernel kernel ( program , " gemm " ) ;
double time = bench ( [ & ] ( ) { generator . enqueue ( kernel , stream , alpha , A , B , beta , C ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , ctx . device ( ) ) ;
double tflops = 2 * 1e-3 * M * N * K / time ;
best = std : : max ( tflops , best ) ;
std : : cout < < " // " ;
std : : copy ( x . begin ( ) , x . end ( ) , std : : ostream_iterator < int > ( std : : cout , " " ) ) ;
std : : cout < < " : " < < tflops < < " TFLOPS [BEST: " < < best < < " ] " < < std : : endl ;
} catch ( isaac : : templates : : invalid_parameters const & ) {
return ;
} catch ( drv : : exception : : cuda : : launch_out_of_resources const & ) {
return ;
}
} ) ;
std : : cout < < " ISAAC: " < < best < < std : : endl ;
}
/* Helpers for dumping source code */
void dump_source ( sc : : driver : : Device const & device , sc : : templates : : Generator & generator , opts : : Options * options , std : : string const & name ) {
if ( options - > get < std : : string > ( " format " ) = = " ptx " )
std : : cout < < generator . dump ( device , name ) < < std : : endl ;
else {
auto x = generator . tuning_params ( ) ;
std : : cout < < " Tuning parameters: " < < std : : flush ;
for ( size_t i = 0 ; i < x . size ( ) ; + + i )
std : : cout < < ( ( i > 0 ) ? " , " : " " ) < < x [ i ] < < std : : flush ;
std : : cout < < std : : endl ;
}
}
/* Application code */
int main ( int argc , char * argv [ ] ) {
opts : : Application program ( " isaac-tools " , " Command-line interface for ISAAC " ) ;
// Options
opts : : Options * options = program . options ( ) ;
options - > add < size_t > ( " device " , " Device to run on " , 0 ) ;
2018-03-21 19:36:23 -04:00
options - > add < sc : : DType > ( " dtype " , " Data-type to use for computations " , " float32 " , { { " int8x4 " , sc : : INT8X4_TYPE } , { " float32 " , sc : : FLOAT_TYPE } , { " float64 " , sc : : DOUBLE_TYPE } } ) ;
2017-09-01 21:55:03 -04:00
options - > add < std : : string > ( " name " , " Name to give to the generated kernel " , " kernel " ) ;
options - > add_group ( " search " , " Exhaustively search for best tuning parameters " ) ;
opts : : Options * dump = options - > add_group ( " dump " , " Dump source-code generated by ISAAC " ) ;
dump - > add ( " format " , " Format to generate " , " ptx " , { " ptx " , " params " } ) ;
dump - > add ( " target " , " Target GPU (sm_xx) " , { " sm_50 " , " sm_52 " , " sm_60 " , " sm_61 " , " sm_70 " } ) ;
opts : : Options * bench = options - > add_group ( " bench " , " Benchmark source code generated by ISAAC " ) ;
bench - > add ( " suite " , " Benchmarking suite to run " , " custom " , { " custom " , " deepbench " } ) ;
bench - > add < std : : shared_ptr < Metric > > ( " metric " , " performance metric for the results " , " tflops " , { { " tflops " , std : : make_shared < FLOPS > ( 1e12 ) } , { " ms " , std : : make_shared < Time > ( 1e-3 ) } , { " us " , std : : make_shared < Time > ( 1e-6 ) } } ) ;
// Constraints
options - > add_constraint ( opts : : OneOf ( { " bench " , " dump " , " search " } ) ) ;
2017-11-09 13:11:41 -05:00
options - > add_constraint ( opts : : OneOf ( { " gemm " , " conv " , " pool " } ) ) ;
// GEMM
opts : : Options * gemm = options - > add_group ( " gemm " , " Use matrix-multiplication " ) ;
gemm - > add ( " layout " , " Transposition layout for A and B " , " NT " , { " NN " , " NT " , " TN " , " TT " } ) ;
gemm - > add < std : : vector < size_t > > ( " shape " , " Matrix shapes (M,N,K) " , { 2048 , 2048 , 2048 } , opts : : SizeConstraint ( 3 ) ) ;
gemm - > add < std : : vector < size_t > > ( " kernel " , " Bypass predictive model to use given tuning parameters " , opts : : SizeConstraint ( 14 ) ) ;
// CONV
opts : : Options * conv = options - > add_group ( " conv " , " Use convolutions " ) ;
conv - > add < std : : vector < size_t > > ( " shape " , " Tensor shapes (D, H, W, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w) " , { 1 , 70 , 14 , 512 , 128 , 64 , 1 , 7 , 7 , 0 , 0 , 0 , 1 , 1 , 1 } , opts : : SizeConstraint ( 15 ) ) ;
conv - > add < std : : vector < size_t > > ( " kernel " , " Bypass predictive model to use given tuning parameters " , opts : : SizeConstraint ( 9 ) ) ;
// POOL
opts : : Options * pool = options - > add_group ( " pool " , " Use pooling " ) ;
pool - > add < std : : vector < size_t > > ( " shape " , " Tensor shapes (D, H, W, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w) " , { 1 , 70 , 14 , 128 , 64 , 1 , 7 , 7 , 0 , 0 , 0 , 1 , 1 , 1 } , opts : : SizeConstraint ( 14 ) ) ;
pool - > add < std : : vector < size_t > > ( " kernel " , " Bypass predictive model to use given tuning parameters " , opts : : SizeConstraint ( 4 ) ) ;
2017-09-01 21:55:03 -04:00
program . parse ( argc , argv ) ;
if ( options - > has ( " bench " ) )
std : : cout < < std : : fixed < < std : : setprecision ( 2 ) ;
//Device
sc : : driver : : Device device = sc : : driver : : backend : : devices ( ) [ options - > get < size_t > ( " device " ) ] ;
if ( options - > has ( " dump " ) & & dump - > has ( " target " ) ) {
std : : string target = dump - > get < std : : string > ( " target " ) ;
char major = target [ 3 ] ;
char minor = target [ 4 ] ;
device . interpret_as ( std : : make_pair ( ( size_t ) std : : atoi ( & major ) , ( size_t ) std : : atoi ( & minor ) ) ) ;
}
static sc : : driver : : Context context ( device ) ;
sc : : driver : : Stream stream ( context ) ;
// Data-Type
sc : : DType dtype = options - > get < sc : : DType > ( " dtype " ) ;
// Kernel name
std : : string name = options - > get < std : : string > ( " name " ) ;
/* Get optimized kernel generator */
std : : unique_ptr < sc : : templates : : Generator > generator ;
// GEMM
if ( options - > has ( " gemm " ) ) {
std : : string layout = gemm - > get < std : : string > ( " layout " ) ;
sc : : IsaacOperation_t AT = layout [ 0 ] = = ' T ' ? sc : : ISAAC_OP_T : sc : : ISAAC_OP_N ;
sc : : IsaacOperation_t BT = layout [ 1 ] = = ' T ' ? sc : : ISAAC_OP_T : sc : : ISAAC_OP_N ;
auto shape = gemm - > get < std : : vector < size_t > > ( " shape " ) ;
size_t M = shape [ 0 ] , N = shape [ 1 ] , K = shape [ 2 ] ;
//Get Source
size_t ldc = M ;
size_t lda = ( AT = = sc : : ISAAC_OP_N ) ? M : K ;
size_t ldb = ( BT = = sc : : ISAAC_OP_N ) ? K : N ;
2017-11-09 13:11:41 -05:00
if ( options - > has ( " search " ) ) {
search_gemm ( M , N , K , AT , BT , dtype ) ;
}
2017-09-01 21:55:03 -04:00
if ( gemm - > has ( " kernel " ) ) {
auto x = gemm - > get < std : : vector < size_t > > ( " kernel " ) ;
2018-03-21 19:36:23 -04:00
generator . reset ( new sc : : templates : : GEMM ( dtype , dtype , AT , BT , M , N , K , 0 , lda , 0 , ldb , 0 , ldc , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] , x [ 4 ] , x [ 5 ] , x [ 6 ] , x [ 7 ] , x [ 8 ] , x [ 9 ] , x [ 10 ] , x [ 11 ] , x [ 12 ] , x [ 13 ] ) ) ;
2017-09-01 21:55:03 -04:00
}
else {
sc : : runtime : : GEMMProfile * profile = ( sc : : runtime : : GEMMProfile * ) sc : : runtime : : database . at ( { device . architecture ( ) , sc : : runtime : : GEMM } ) . get ( ) ;
2018-03-21 19:36:23 -04:00
generator . reset ( new sc : : templates : : GEMM ( profile - > predict ( stream , dtype , dtype , AT , BT , M , N , K , 0 , lda , 0 , ldb , 0 , ldc ) ) ) ;
2017-09-01 21:55:03 -04:00
}
if ( options - > has ( " dump " ) )
dump_source ( device , * generator , dump , name ) ;
if ( options - > has ( " bench " ) ) {
auto metric = bench - > get < std : : shared_ptr < Metric > > ( " metric " ) ;
print_results_header ( { " AT " , " BT " , " M " , " N " , " K " , " ISAAC " , " cuBLAS " } ) ;
std : : vector < gemm_params_t > shapes ;
//User provided shapes
if ( bench - > get < std : : string > ( " suite " ) = = " custom " )
shapes = { std : : make_tuple ( dtype , AT , BT , M , N , K ) } ;
//SC17 paper shapes
if ( bench - > get < std : : string > ( " suite " ) = = " deepbench " )
2018-03-21 19:36:23 -04:00
shapes = SC17 : : gemm ( dtype ) ;
2017-09-01 21:55:03 -04:00
//Print results
for ( auto x : shapes ) {
std : : tie ( dtype , AT , BT , M , N , K ) = x ;
benchmark_gemm ( * metric , context , device , stream , dtype , AT , BT , M , N , K , gemm - > has ( " kernel " ) ? generator . get ( ) : NULL ) ;
}
}
}
// CONV
if ( options - > has ( " conv " ) ) {
2018-03-21 19:36:23 -04:00
sc : : DType in_dtype = dtype ;
sc : : DType out_dtype = dtype ;
2017-09-01 21:55:03 -04:00
auto x = conv - > get < std : : vector < size_t > > ( " shape " ) ;
2017-11-09 13:11:41 -05:00
param_t D = x [ 0 ] , H = x [ 1 ] , W = x [ 2 ] , C = x [ 3 ] , N = x [ 4 ] , K = x [ 5 ] , T = x [ 6 ] , R = x [ 7 ] , S = x [ 8 ] , pad_d = x [ 9 ] , pad_h = x [ 10 ] , pad_w = x [ 11 ] , stride_d = x [ 12 ] , stride_h = x [ 13 ] , stride_w = x [ 14 ] ;
param_t M , P , Q ;
2018-03-21 19:36:23 -04:00
param_t upsample_d = 1 , upsample_h = 1 , upsample_w = 1 ;
param_t Zk = 0 , crop_z_m0 = 0 , crop_z_m1 = 0 , crop_z_p0 = 0 , crop_z_p1 = 0 , crop_z_q0 = 0 , crop_z_q1 = 0 ;
sc : : templates : : Conv : : output_shapes ( D , H , W , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , upsample_d , upsample_h , upsample_w , M , P , Q ) ;
sc : : ActivationType activation = sc : : Linear ;
2017-11-09 13:11:41 -05:00
if ( options - > has ( " search " ) )
2018-03-21 19:36:23 -04:00
search_conv ( D , H , W , C , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , upsample_d , upsample_h , upsample_w , activation , in_dtype , out_dtype ) ;
2017-09-01 21:55:03 -04:00
if ( conv - > has ( " kernel " ) ) {
auto x = conv - > get < std : : vector < size_t > > ( " kernel " ) ;
2018-03-21 19:36:23 -04:00
generator . reset ( new sc : : templates : : Conv ( in_dtype , out_dtype , C , D , H , W , N , K , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , upsample_d , upsample_h , upsample_w , activation , 1 , sc : : NoResidual , Zk , crop_z_m0 , crop_z_m1 , crop_z_p0 , crop_z_p1 , crop_z_q0 , crop_z_q1 , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] , x [ 4 ] , x [ 5 ] , x [ 6 ] , x [ 7 ] , x [ 8 ] ) ) ;
2017-09-01 21:55:03 -04:00
}
else {
sc : : runtime : : ConvProfile * profile = ( sc : : runtime : : ConvProfile * ) sc : : runtime : : database . at ( { device . architecture ( ) , sc : : runtime : : CONV } ) . get ( ) ;
2018-03-21 19:36:23 -04:00
generator . reset ( new sc : : templates : : Conv ( profile - > predict ( stream , in_dtype , out_dtype , C , D , H , W , N , K , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , upsample_d , upsample_h , upsample_w , activation , 1 , sc : : NoResidual , Zk , crop_z_m0 , crop_z_m1 , crop_z_p0 , crop_z_p1 , crop_z_q0 , crop_z_q1 ) ) ) ;
2017-09-01 21:55:03 -04:00
}
if ( options - > has ( " dump " ) )
dump_source ( device , * generator , dump , name ) ;
if ( options - > has ( " bench " ) ) {
auto metric = bench - > get < std : : shared_ptr < Metric > > ( " metric " ) ;
2017-11-09 13:11:41 -05:00
print_results_header ( { " N " , " K " , " M " , " P " , " Q " , " C " , " T " , " R " , " S " , " ISAAC " , " cuDNN " } ) ;
2017-09-01 21:55:03 -04:00
std : : vector < conv_params_t > shapes ;
//User provided shapes
if ( bench - > get < std : : string > ( " suite " ) = = " custom " )
2017-11-09 13:11:41 -05:00
shapes = { std : : make_tuple ( dtype , D , W , H , C , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) } ;
2017-09-01 21:55:03 -04:00
//SuperComputing17 shapes
if ( bench - > get < std : : string > ( " suite " ) = = " deepbench " )
2018-03-21 19:36:23 -04:00
shapes = SC17 : : conv ( dtype ) ;
2017-09-01 21:55:03 -04:00
//Print results
for ( auto x : shapes ) {
2017-11-09 13:11:41 -05:00
std : : tie ( dtype , D , W , H , C , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) = x ;
2018-03-21 19:36:23 -04:00
benchmark_conv ( * metric , context , device , stream , in_dtype , out_dtype , D , H , W , C , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , upsample_d , upsample_h , upsample_w , conv - > has ( " kernel " ) ? generator . get ( ) : NULL ) ;
2017-11-09 13:11:41 -05:00
}
}
}
// POOL
if ( options - > has ( " pool " ) ) {
auto x = pool - > get < std : : vector < size_t > > ( " shape " ) ;
param_t D = x [ 0 ] , W = x [ 1 ] , H = x [ 2 ] , N = x [ 3 ] , K = x [ 4 ] , T = x [ 5 ] , R = x [ 6 ] , S = x [ 7 ] , pad_d = x [ 8 ] , pad_h = x [ 9 ] , pad_w = x [ 10 ] , stride_d = x [ 11 ] , stride_h = x [ 12 ] , stride_w = x [ 13 ] ;
param_t M , P , Q ;
2018-03-21 19:36:23 -04:00
sc : : templates : : Conv : : output_shapes ( D , H , W , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , 1 , 1 , 1 , M , P , Q ) ;
2017-11-09 13:11:41 -05:00
if ( pool - > has ( " kernel " ) ) {
auto x = pool - > get < std : : vector < size_t > > ( " kernel " ) ;
2018-03-21 19:36:23 -04:00
generator . reset ( new sc : : templates : : Pool ( dtype , dtype , sc : : MaxPool , K , D , H , W , N , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] ) ) ;
2017-11-09 13:11:41 -05:00
}
else {
2018-03-21 19:36:23 -04:00
generator . reset ( new sc : : templates : : Pool ( dtype , dtype , sc : : MaxPool , K , D , H , W , N , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) ) ;
2017-11-09 13:11:41 -05:00
}
if ( options - > has ( " dump " ) )
dump_source ( device , * generator , dump , name ) ;
if ( options - > has ( " bench " ) ) {
auto metric = bench - > get < std : : shared_ptr < Metric > > ( " metric " ) ;
print_results_header ( { " N " , " K " , " M " , " P " , " Q " , " T " , " R " , " S " , " ISAAC " , " cuDNN " } ) ;
std : : vector < pool_params_t > shapes ;
//User provided shapes
if ( bench - > get < std : : string > ( " suite " ) = = " custom " )
shapes = { std : : make_tuple ( dtype , D , W , H , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) } ;
//SuperComputing17 shapes
if ( bench - > get < std : : string > ( " suite " ) = = " deepbench " )
2018-03-21 19:36:23 -04:00
shapes = SC17 : : pool ( dtype ) ;
2017-11-09 13:11:41 -05:00
//Print results
for ( auto x : shapes ) {
std : : tie ( dtype , D , W , H , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) = x ;
benchmark_pool ( * metric , context , device , stream , dtype , D , H , W , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , pool - > has ( " kernel " ) ? generator . get ( ) : NULL ) ;
2017-09-01 21:55:03 -04:00
}
}
}
}