2017-09-01 21:55:03 -04:00
# include "opts.hpp"
# include "isaac/scalar.h"
# include "isaac/api.h"
# include "isaac/driver/cublas.h"
# include "isaac/driver/backend.h"
# include "isaac/driver/context.h"
# include "isaac/driver/stream.h"
# include "isaac/runtime/predict.h"
# include "isaac/templates/gemm.h"
# include "isaac/templates/error.hpp"
# include "isaac/tools/bench.hpp"
namespace sc = isaac ;
namespace drv = sc : : driver ;
using sc : : param_t ;
enum Code {
RESET = 0 ,
BOLD = 1 ,
ITALIC = 3 ,
FG_RED = 31 ,
FG_GREEN = 32 ,
FG_YELLOW = 33 ,
FG_BLUE = 34 ,
FG_MAGENTA = 35 ,
FG_CYAN = 36 ,
FG_LIGHT_GRAY = 37 ,
FG_DARK_GRAY = 90 ,
FG_LIGHT_RED = 91 ,
FG_LIGHT_GREEN = 92 ,
FG_LIGHT_YELLOW = 93 ,
FG_LIGHT_BLUE = 94 ,
FG_LIGHT_MAGENTA = 95 ,
FG_LIGHT_CYAN = 96 ,
FG_WHITE = 97
} ;
class color_stream {
Code code ;
public :
color_stream ( Code pCode ) : code ( pCode ) { }
friend std : : ostream &
operator < < ( std : : ostream & os , const color_stream & mod ) {
return os < < " \033 [ " < < mod . code < < " m " ;
}
} ;
/* Helpers for benchmarking */
typedef std : : tuple < sc : : DType , sc : : IsaacOperation_t , sc : : IsaacOperation_t , sc : : param_t , sc : : param_t , sc : : param_t > gemm_params_t ;
2017-11-09 13:11:41 -05:00
typedef std : : tuple < sc : : DType , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t > conv_params_t ;
typedef std : : tuple < sc : : DType , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t > pool_params_t ;
2017-09-01 21:55:03 -04:00
struct TestBench {
// GEMM
static std : : vector < gemm_params_t > gemm ( sc : : DType dtype ) {
std : : vector < gemm_params_t > shapes ;
// LinPack
for ( param_t N : std : : vector < param_t > { 512 , 1024 , 2048 } )
shapes . push_back ( std : : make_tuple ( dtype , sc : : ISAAC_OP_N , sc : : ISAAC_OP_T , N , N , N ) ) ;
// DeepBench
for ( sc : : IsaacOperation_t AT : std : : vector < sc : : IsaacOperation_t > { sc : : ISAAC_OP_N , sc : : ISAAC_OP_T } )
for ( param_t M : std : : vector < param_t > { 1760 } )
for ( param_t N : std : : vector < param_t > { 16 , 32 , 64 , 128 } )
shapes . push_back ( std : : make_tuple ( dtype , AT , sc : : ISAAC_OP_N , M , N , M ) ) ;
// PCA/ICA
for ( param_t N : std : : vector < param_t > { 16 , 64 , 256 } )
for ( param_t K : std : : vector < param_t > { 64000 } )
shapes . push_back ( std : : make_tuple ( dtype , sc : : ISAAC_OP_N , sc : : ISAAC_OP_T , N , N , K ) ) ;
// LaPACK
for ( param_t N : std : : vector < param_t > { 1024 , 2048 , 4096 } )
for ( param_t K : std : : vector < param_t > { 32 } )
shapes . push_back ( std : : make_tuple ( dtype , sc : : ISAAC_OP_N , sc : : ISAAC_OP_T , N , N , K ) ) ;
return shapes ;
}
// CONV
static std : : vector < conv_params_t > conv ( sc : : DType dtype ) {
2017-11-09 13:11:41 -05:00
// Vector of (dtype, D, W, H, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w)
2017-09-01 21:55:03 -04:00
2017-11-09 13:11:41 -05:00
std : : vector < conv_params_t > shapes ;
// DeepSpeech
for ( size_t N : std : : vector < size_t > { 8 } )
shapes . push_back ( std : : make_tuple ( dtype , 1 , 700 , 161 , 1 , N , 32 , 1 , 5 , 20 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
for ( size_t N : std : : vector < size_t > { 8 } )
shapes . push_back ( std : : make_tuple ( dtype , 1 , 341 , 79 , 32 , N , 32 , 1 , 5 , 10 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
// OCR
shapes . push_back ( std : : make_tuple ( dtype , 1 , 480 , 48 , 1 , 16 , 16 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 240 , 24 , 16 , 16 , 32 , 1 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 120 , 12 , 32 , 16 , 64 , 1 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 60 , 6 , 64 , 16 , 128 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
// Face Recognition
shapes . push_back ( std : : make_tuple ( dtype , 1 , 108 , 108 , 3 , 8 , 64 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 2 , 2 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 54 , 54 , 64 , 8 , 64 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 27 , 27 , 128 , 8 , 128 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 14 , 14 , 128 , 8 , 256 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 7 , 7 , 256 , 8 , 512 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
// Vision
2017-09-01 21:55:03 -04:00
for ( size_t N : std : : vector < size_t > { 8 } ) {
2017-11-09 13:11:41 -05:00
shapes . push_back ( std : : make_tuple ( dtype , 1 , 224 , 224 , 3 , N , 64 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 112 , 112 , 64 , N , 128 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 56 , 56 , 128 , N , 256 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 28 , 28 , 256 , N , 512 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 14 , 14 , 512 , N , 512 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 7 , 7 , 512 , N , 512 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
}
shapes . push_back ( std : : make_tuple ( dtype , 1 , 224 , 224 , 3 , 16 , 64 , 1 , 7 , 7 , 0 , 3 , 3 , 1 , 2 , 2 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 28 , 28 , 192 , 16 , 32 , 1 , 5 , 5 , 0 , 2 , 2 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 28 , 28 , 192 , 16 , 64 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 14 , 14 , 512 , 16 , 48 , 1 , 5 , 5 , 0 , 2 , 2 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 14 , 14 , 512 , 16 , 192 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 7 , 7 , 832 , 16 , 256 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 7 , 7 , 832 , 16 , 128 , 1 , 5 , 5 , 0 , 2 , 2 , 1 , 1 , 1 ) ) ;
// Speaker ID
shapes . push_back ( std : : make_tuple ( dtype , 1 , 350 , 80 , 64 , 16 , 128 , 1 , 5 , 5 , 0 , 1 , 1 , 1 , 2 , 2 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 175 , 40 , 128 , 16 , 256 , 1 , 5 , 5 , 0 , 1 , 1 , 1 , 2 , 2 ) ) ;
// ResNET
for ( size_t N : std : : vector < size_t > { 8 } ) {
shapes . push_back ( std : : make_tuple ( dtype , 1 , 112 , 112 , 64 , N , 64 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 56 , 56 , 64 , N , 256 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 56 , 56 , 256 , N , 64 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 56 , 56 , 256 , N , 128 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 2 , 2 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 28 , 28 , 128 , N , 512 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 28 , 28 , 512 , N , 128 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 28 , 28 , 512 , N , 256 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 2 , 2 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 14 , 14 , 256 , N , 1024 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 28 , 28 , 512 , N , 1024 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 2 , 2 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 14 , 14 , 1024 , N , 2048 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 2 , 2 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 7 , 7 , 512 , N , 512 , 1 , 3 , 3 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 7 , 7 , 512 , N , 2048 , 1 , 1 , 1 , 0 , 1 , 1 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 14 , 14 , 1024 , N , 2048 , 1 , 1 , 1 , 0 , 1 , 1 , 1 , 2 , 2 ) ) ;
2017-09-01 21:55:03 -04:00
}
2017-11-09 13:11:41 -05:00
// 3D-Unet
shapes . push_back ( std : : make_tuple ( dtype , 31 , 204 , 204 , 4 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 29 , 202 , 202 , 24 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 27 , 100 , 100 , 24 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 25 , 98 , 98 , 72 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 23 , 48 , 48 , 72 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 21 , 46 , 46 , 216 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 19 , 22 , 22 , 216 , 1 , 648 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 17 , 20 , 20 , 648 , 1 , 648 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 15 , 36 , 36 , 648 , 1 , 432 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 13 , 36 , 36 , 432 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 34 , 34 , 216 , 1 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 64 , 64 , 216 , 1 , 144 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 64 , 64 , 144 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 9 , 62 , 62 , 72 , 1 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 7 , 120 , 120 , 72 , 1 , 48 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 5 , 120 , 120 , 48 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 3 , 118 , 118 , 24 , 1 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 116 , 116 , 3 , 1 , 24 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
2017-09-01 21:55:03 -04:00
2017-11-09 13:11:41 -05:00
return shapes ;
}
// POOL
static std : : vector < pool_params_t > pool ( sc : : DType dtype ) {
std : : vector < pool_params_t > shapes ;
// 3D-Unet
shapes . push_back ( std : : make_tuple ( dtype , 31 , 204 , 204 , 4 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 29 , 202 , 202 , 4 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 27 , 100 , 100 , 4 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 25 , 98 , 98 , 4 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 23 , 48 , 48 , 4 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 21 , 46 , 46 , 4 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 19 , 22 , 22 , 4 , 648 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 17 , 20 , 20 , 4 , 648 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 15 , 36 , 36 , 4 , 432 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 13 , 36 , 36 , 4 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 34 , 34 , 4 , 216 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 64 , 64 , 4 , 144 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 11 , 64 , 64 , 4 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 9 , 62 , 62 , 4 , 72 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 7 , 120 , 120 , 4 , 48 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 5 , 120 , 120 , 4 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 3 , 118 , 118 , 4 , 24 , 3 , 3 , 3 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
shapes . push_back ( std : : make_tuple ( dtype , 1 , 116 , 116 , 4 , 24 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 ) ) ;
2017-09-01 21:55:03 -04:00
return shapes ;
}
2017-11-09 13:11:41 -05:00
2017-09-01 21:55:03 -04:00
} ;
/* Metrics for benchmarking */
struct Metric {
virtual std : : function < bool ( double , double ) > cmp ( ) const = 0 ;
2017-11-09 13:11:41 -05:00
virtual double conv ( param_t P , param_t Q , param_t M , param_t K , param_t N , param_t C , param_t R , param_t S , param_t T , double tsec ) const = 0 ;
2017-09-01 21:55:03 -04:00
virtual double gemm ( param_t M , param_t N , param_t K , double tsec ) const = 0 ;
2017-11-09 13:11:41 -05:00
virtual double pool ( param_t P , param_t Q , param_t M , param_t K , param_t N , param_t , param_t , param_t , double tsec ) const = 0 ;
2017-09-01 21:55:03 -04:00
} ;
class FLOPS : public Metric {
public :
FLOPS ( double scale ) : scale_ ( scale ) { }
std : : function < bool ( double , double ) > cmp ( ) const { return std : : greater < double > ( ) ; }
2017-11-09 13:11:41 -05:00
double conv ( param_t P , param_t Q , param_t M , param_t K , param_t N , param_t C , param_t R , param_t S , param_t T , double tsec ) const
{ return sc : : templates : : Conv : : tflops ( P , Q , M , K , N , C , R , S , T , tsec ) * 1e12 / scale_ ; }
2017-09-01 21:55:03 -04:00
double gemm ( param_t M , param_t N , param_t K , double tsec ) const
{ return sc : : templates : : GEMM : : tflops ( M , N , K , tsec ) * 1e12 / scale_ ; }
2017-11-09 13:11:41 -05:00
double pool ( param_t P , param_t Q , param_t M , param_t K , param_t N , param_t T , param_t R , param_t S , double tsec ) const
{ return sc : : templates : : Pool : : tflops ( P , Q , M , K , N , T , R , S , tsec ) * 1e12 / scale_ ; }
2017-09-01 21:55:03 -04:00
private :
double scale_ ;
} ;
class Time : public Metric {
public :
Time ( double scale ) : scale_ ( scale ) { }
std : : function < bool ( double , double ) > cmp ( ) const { return std : : less < double > ( ) ; }
2017-11-09 13:11:41 -05:00
double conv ( param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , double tsec ) const { return tsec * 1e-9 / scale_ ; }
2017-09-01 21:55:03 -04:00
double gemm ( param_t , param_t , param_t , double tsec ) const { return tsec * 1e-9 / scale_ ; }
2017-11-09 13:11:41 -05:00
double pool ( param_t , param_t , param_t , param_t , param_t , param_t , param_t , param_t , double tsec ) const { return tsec * 1e-9 / scale_ ; }
2017-09-01 21:55:03 -04:00
private :
double scale_ ;
} ;
void print_results_header ( std : : vector < std : : string > sections ) {
std : : cout < < color_stream ( ITALIC ) < < color_stream ( BOLD ) ;
std : : copy ( sections . begin ( ) , sections . end ( ) , std : : ostream_iterator < std : : string > ( std : : cout , " \t " ) ) ;
std : : cout < < color_stream ( RESET ) < < std : : endl ;
}
void print_results ( std : : vector < double > const & times , std : : vector < std : : string > const & prefix , std : : function < bool ( double , double ) > cmp , std : : function < double ( double ) > fn ) {
std : : copy ( prefix . begin ( ) , prefix . end ( ) , std : : ostream_iterator < std : : string > ( std : : cout , " \t " ) ) ;
std : : vector < double > perf ;
std : : transform ( times . begin ( ) , times . end ( ) , std : : back_inserter ( perf ) , fn ) ;
auto fastest = perf ;
std : : sort ( fastest . begin ( ) , fastest . end ( ) , cmp ) ;
for ( auto x : perf ) {
if ( x = = fastest [ 0 ] & & x / fastest [ 1 ] > 1.05 )
std : : cout < < color_stream ( FG_LIGHT_BLUE ) < < x < < color_stream ( RESET ) ;
else
std : : cout < < x ;
std : : cout < < " \t " ;
}
std : : cout < < std : : endl ;
}
void benchmark_gemm ( Metric const & metric , sc : : driver : : Context & ctx , sc : : driver : : Device & device , sc : : driver : : Stream & stream ,
sc : : DType dtype , sc : : IsaacOperation_t AT , sc : : IsaacOperation_t BT , size_t M , size_t N , size_t K ,
sc : : templates : : Generator * generator ) {
size_t ldc = M ;
size_t lda = ( AT = = sc : : ISAAC_OP_N ) ? M : K ;
size_t ldb = ( BT = = sc : : ISAAC_OP_N ) ? K : N ;
size_t dtsize = sc : : size_of ( dtype ) ;
sc : : scalar alpha ( 1. , dtype ) ;
sc : : scalar beta ( 0. , dtype ) ;
char cuAT = ( AT = = sc : : ISAAC_OP_T ) ? ' T ' : ' N ' ;
char cuBT = ( BT = = sc : : ISAAC_OP_T ) ? ' T ' : ' N ' ;
sc : : driver : : Buffer C ( ctx , M * N * dtsize ) ;
sc : : driver : : Buffer A ( ctx , M * K * dtsize ) ;
sc : : driver : : Buffer B ( ctx , K * N * dtsize ) ;
std : : vector < double > times ;
times . push_back ( bench ( [ & ] ( ) { sc : : GEMM ( device , stream , dtype , AT , BT , M , N , K , 0 , lda , 0 , ldb , 0 , ldc , alpha , A , B , beta , C , ( sc : : templates : : GEMM * ) generator ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
if ( sc : : driver : : dispatch : : cublasinit ( ) ) {
2017-11-09 13:11:41 -05:00
cublasGemmAlgo_t fastest ;
sc : : driver : : cublasGemm ( dtype , stream , cuAT , cuBT , M , N , K , alpha , A , lda , B , ldb , beta , C , ldc , & fastest ) ;
times . push_back ( bench ( [ & ] ( ) { sc : : driver : : cublasGemm ( dtype , stream , cuAT , cuBT , M , N , K , alpha , A , lda , B , ldb , beta , C , ldc , NULL , fastest ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
//times.push_back(bench([&](){ sc::driver::cublasGemm(dtype, stream, cuAT, cuBT, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); }, [&](){ stream.synchronize(); }, device));
2017-09-01 21:55:03 -04:00
}
print_results ( times , { str ( AT ) , str ( BT ) , str ( M ) , str ( N ) , str ( K ) } , metric . cmp ( ) , [ & ] ( double tsec ) { return metric . gemm ( M , N , K , tsec ) ; } ) ;
}
void benchmark_conv ( Metric const & metric , sc : : driver : : Context & ctx , sc : : driver : : Device & device , sc : : driver : : Stream & stream ,
2017-11-09 13:11:41 -05:00
sc : : DType dtype , size_t D , size_t H , size_t W , size_t C , size_t N , size_t K , size_t T , size_t R , size_t S , size_t pad_d , size_t pad_h , size_t pad_w , size_t stride_d , size_t stride_h , size_t stride_w ,
2017-09-01 21:55:03 -04:00
sc : : templates : : Generator * generator ) {
2017-11-09 13:11:41 -05:00
param_t M , P , Q ;
sc : : templates : : Conv : : output_shapes ( D , H , W , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , M , P , Q ) ;
2017-09-01 21:55:03 -04:00
size_t dtsize = sc : : size_of ( dtype ) ;
sc : : scalar alpha ( 1. , dtype ) ;
sc : : scalar beta ( 0. , dtype ) ;
2017-11-09 13:11:41 -05:00
sc : : driver : : Buffer O ( ctx , N * K * M * P * Q * dtsize ) ;
sc : : driver : : Buffer I ( ctx , C * D * H * W * N * dtsize ) ;
sc : : driver : : Buffer F ( ctx , K * C * T * R * S * dtsize ) ;
2017-09-01 21:55:03 -04:00
std : : vector < double > times ;
2017-11-09 13:11:41 -05:00
times . push_back ( bench ( [ & ] ( ) { sc : : CONV ( device , stream , dtype , N , K , M , P , Q , C , T , R , S , D , H , W , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , alpha , I , F , beta , O , ( sc : : templates : : Conv * ) generator ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
2017-09-01 21:55:03 -04:00
if ( sc : : driver : : dispatch : : cudnninit ( ) )
2017-11-09 13:11:41 -05:00
times . push_back ( bench ( [ & ] ( ) { sc : : driver : : cudnnConv ( dtype , stream , D , H , W , N , K , M , P , Q , C , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , alpha , I , F , beta , O ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
print_results ( times , { str ( N ) , str ( K ) , str ( M ) , str ( P ) , str ( Q ) , str ( C ) , str ( T ) , str ( R ) , str ( S ) } , metric . cmp ( ) , [ & ] ( double tsec ) { return metric . conv ( M , P , Q , K , N , C , T , R , S , tsec ) ; } ) ;
2017-09-01 21:55:03 -04:00
}
2017-11-09 13:11:41 -05:00
void benchmark_pool ( Metric const & metric , sc : : driver : : Context & ctx , sc : : driver : : Device & device , sc : : driver : : Stream & stream ,
sc : : DType dtype , size_t D , size_t H , size_t W , size_t N , size_t K , size_t T , size_t R , size_t S , size_t pad_d , size_t pad_h , size_t pad_w , size_t stride_d , size_t stride_h , size_t stride_w ,
sc : : templates : : Generator * generator ) {
param_t M , P , Q ;
sc : : templates : : Conv : : output_shapes ( D , H , W , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , M , P , Q ) ;
size_t dtsize = sc : : size_of ( dtype ) ;
sc : : scalar alpha ( 1. , dtype ) ;
sc : : scalar beta ( 0. , dtype ) ;
sc : : driver : : Buffer O ( ctx , N * K * M * P * Q * dtsize ) ;
sc : : driver : : Buffer I ( ctx , K * D * H * W * N * dtsize ) ;
std : : vector < double > times ;
times . push_back ( bench ( [ & ] ( ) { sc : : POOL ( device , stream , dtype , K , M , P , Q , N , T , R , S , D , H , W , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , I , O , ( sc : : templates : : Pool * ) generator ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
if ( sc : : driver : : dispatch : : cudnninit ( ) )
times . push_back ( bench ( [ & ] ( ) { sc : : driver : : cudnnPool ( dtype , stream , D , H , W , N , K , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , alpha , I , beta , O ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , device ) ) ;
print_results ( times , { str ( N ) , str ( K ) , str ( M ) , str ( P ) , str ( Q ) , str ( T ) , str ( R ) , str ( S ) } , metric . cmp ( ) , [ & ] ( double tsec ) { return metric . pool ( M , P , Q , K , N , T , R , S , tsec ) ; } ) ;
}
2017-09-01 21:55:03 -04:00
/* ------------------------------- */
void loop_nest ( std : : vector < size_t > const & ranges , std : : function < void ( std : : vector < size_t > const & ) > const & f ) {
size_t D = ranges . size ( ) ;
std : : vector < size_t > values ( D , 0 ) ;
// Start with innermost loop
size_t i = D - 1 ;
while ( true ) {
//Execute function
f ( values ) ;
//Increment counters
while ( values [ i ] + + = = ranges [ i ] - 1 ) {
if ( i = = 0 )
return ;
values [ i - - ] = 0 ;
}
i = D - 1 ;
}
}
template < class T >
void loop_nest ( std : : vector < std : : vector < T > > const & iterates , std : : function < void ( std : : vector < T > ) > const & f ) {
//Ranges to iterate over
std : : vector < size_t > ranges ;
for ( auto const & x : iterates )
ranges . push_back ( x . size ( ) ) ;
//Proxy function
auto proxy = [ & ] ( std : : vector < size_t > const & idx ) {
std : : vector < T > x ( iterates . size ( ) ) ;
for ( size_t i = 0 ; i < x . size ( ) ; + + i )
x [ i ] = iterates [ i ] [ idx [ i ] ] ;
f ( x ) ;
} ;
//Iterate
loop_nest ( ranges , proxy ) ;
}
void search_conv ( int32_t W , int32_t H , int32_t C , int32_t N , int32_t K , int32_t R , int32_t S , int32_t pad_h , int32_t pad_w , int32_t stride_h , int32_t stride_w , sc : : DType dtype ) {
auto ctx = drv : : backend : : contexts : : get_default ( ) ;
size_t dtsize = sc : : size_of ( dtype ) ;
2017-11-09 13:11:41 -05:00
size_t D = 1 , T = 1 , stride_d = 1 , pad_d = 0 ;
size_t P = ( H - R + 1 + 2 * pad_h + stride_h - 1 ) / stride_h ;
size_t Q = ( W - S + 1 + 2 * pad_w + stride_w - 1 ) / stride_w ;
size_t M = ( D - T + 1 + 2 * pad_d + stride_d - 1 ) / stride_d ;
2017-09-01 21:55:03 -04:00
//Setup
2017-11-09 13:11:41 -05:00
drv : : Buffer O ( ctx , K * P * Q * M * N * dtsize ) ;
drv : : Buffer I ( ctx , C * H * W * D * N * dtsize ) ;
drv : : Buffer F ( ctx , C * R * S * T * K * dtsize ) ;
2017-09-01 21:55:03 -04:00
drv : : Stream stream ( ctx ) ;
sc : : scalar alpha ( 1. , dtype ) , beta ( 1. , dtype ) ;
//Exhaustive search
std : : vector < sc : : param_t > r1 = { 1 } ;
2017-11-09 13:11:41 -05:00
std : : vector < sc : : param_t > rv = { 4 } ;
std : : vector < sc : : param_t > rr = { 1 , 2 , 4 , 8 } ;
std : : vector < sc : : param_t > rl = { 4 , 8 , 16 , 32 } ;
std : : vector < sc : : param_t > rs = { 4 , 8 , 16 } ;
2017-09-01 21:55:03 -04:00
double best ;
2017-11-09 13:11:41 -05:00
loop_nest < sc : : param_t > ( { rv , rl , rl , rs , rs , rl , rl , r1 , rr , rr } , [ & ] ( std : : vector < sc : : param_t > const & x ) {
sc : : templates : : Conv generator ( dtype , C , D , H , W , N , K , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] , x [ 4 ] , x [ 5 ] , x [ 6 ] , x [ 7 ] , x [ 8 ] ) ;
2017-09-01 21:55:03 -04:00
//Compile
try {
std : : string src = generator . dump ( ctx . device ( ) , " conv " ) ;
drv : : Module program ( ctx , src ) ;
drv : : Kernel kernel ( program , " conv " ) ;
double tsec = bench ( [ & ] ( ) { generator . enqueue ( kernel , stream , alpha , I , F , beta , O ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , ctx . device ( ) ) ;
2017-11-09 13:11:41 -05:00
double tflops = sc : : templates : : Conv : : tflops ( P , Q , M , K , N , C , R , S , T , tsec ) ;
2017-09-01 21:55:03 -04:00
best = std : : max ( tflops , best ) ;
std : : cout < < " // " ;
std : : copy ( x . begin ( ) , x . end ( ) , std : : ostream_iterator < int > ( std : : cout , " " ) ) ;
std : : cout < < " : " < < tflops < < " TFLOPS [BEST: " < < best < < " ] " < < std : : endl ;
} catch ( isaac : : templates : : invalid_parameters const & ) {
return ;
} catch ( drv : : exception : : cuda : : launch_out_of_resources const & ) {
return ;
}
} ) ;
std : : cout < < " ISAAC: " < < best < < std : : endl ;
}
void search_gemm ( int32_t M , int32_t N , int32_t K , sc : : IsaacOperation_t AT , sc : : IsaacOperation_t BT , sc : : DType dtype ) {
auto ctx = drv : : backend : : contexts : : get_default ( ) ;
size_t dtsize = sc : : size_of ( dtype ) ;
// Setup
size_t ldc = M ;
size_t lda = ( AT = = sc : : ISAAC_OP_N ) ? M : K ;
size_t ldb = ( BT = = sc : : ISAAC_OP_N ) ? K : N ;
int32_t offc = 0 , offa = 0 , offb = 0 ;
drv : : Buffer C ( ctx , M * N * dtsize ) ;
drv : : Buffer A ( ctx , M * K * dtsize ) ;
drv : : Buffer B ( ctx , K * N * dtsize ) ;
drv : : Stream stream ( ctx ) ;
sc : : scalar alpha ( 1. , dtype ) , beta ( 0. , dtype ) ;
// Exhaustive search
std : : vector < sc : : param_t > r1 = { 1 } ;
std : : vector < sc : : param_t > rv = { 4 } ;
std : : vector < sc : : param_t > rr = { 1 , 2 , 4 , 8 } ;
std : : vector < sc : : param_t > rl = { 4 , 8 , 16 , 32 } ;
std : : vector < sc : : param_t > rs = { 4 , 8 , 16 } ;
double best = 0 ;
loop_nest < sc : : param_t > ( { rv , rl , rl , rl , rs , r1 , rs , rl , rl , rl , rl , r1 , rr , rr } , [ & ] ( std : : vector < sc : : param_t > const & x ) {
isaac : : templates : : GEMM generator ( dtype , AT , BT , M , N , K , offa , lda , offb , ldb , offc , ldc , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] , x [ 4 ] , x [ 5 ] , x [ 6 ] , x [ 7 ] , x [ 8 ] , x [ 9 ] , x [ 10 ] , x [ 11 ] , x [ 12 ] , x [ 13 ] ) ;
// Compile
try {
std : : string src = generator . dump ( ctx . device ( ) , " gemm " ) ;
drv : : Module program ( ctx , src ) ;
drv : : Kernel kernel ( program , " gemm " ) ;
double time = bench ( [ & ] ( ) { generator . enqueue ( kernel , stream , alpha , A , B , beta , C ) ; } , [ & ] ( ) { stream . synchronize ( ) ; } , ctx . device ( ) ) ;
double tflops = 2 * 1e-3 * M * N * K / time ;
best = std : : max ( tflops , best ) ;
std : : cout < < " // " ;
std : : copy ( x . begin ( ) , x . end ( ) , std : : ostream_iterator < int > ( std : : cout , " " ) ) ;
std : : cout < < " : " < < tflops < < " TFLOPS [BEST: " < < best < < " ] " < < std : : endl ;
} catch ( isaac : : templates : : invalid_parameters const & ) {
return ;
} catch ( drv : : exception : : cuda : : launch_out_of_resources const & ) {
return ;
}
} ) ;
std : : cout < < " ISAAC: " < < best < < std : : endl ;
}
/* Helpers for dumping source code */
void dump_source ( sc : : driver : : Device const & device , sc : : templates : : Generator & generator , opts : : Options * options , std : : string const & name ) {
if ( options - > get < std : : string > ( " format " ) = = " ptx " )
std : : cout < < generator . dump ( device , name ) < < std : : endl ;
else {
auto x = generator . tuning_params ( ) ;
std : : cout < < " Tuning parameters: " < < std : : flush ;
for ( size_t i = 0 ; i < x . size ( ) ; + + i )
std : : cout < < ( ( i > 0 ) ? " , " : " " ) < < x [ i ] < < std : : flush ;
std : : cout < < std : : endl ;
}
}
/* Application code */
int main ( int argc , char * argv [ ] ) {
opts : : Application program ( " isaac-tools " , " Command-line interface for ISAAC " ) ;
// Options
opts : : Options * options = program . options ( ) ;
options - > add < size_t > ( " device " , " Device to run on " , 0 ) ;
2017-11-09 13:11:41 -05:00
options - > add < sc : : DType > ( " dtype " , " Data-type to use for computations " , " float32 " , { { " float32 " , sc : : FLOAT_TYPE } , { " float64 " , sc : : DOUBLE_TYPE } } ) ;
2017-09-01 21:55:03 -04:00
options - > add < std : : string > ( " name " , " Name to give to the generated kernel " , " kernel " ) ;
options - > add_group ( " search " , " Exhaustively search for best tuning parameters " ) ;
opts : : Options * dump = options - > add_group ( " dump " , " Dump source-code generated by ISAAC " ) ;
dump - > add ( " format " , " Format to generate " , " ptx " , { " ptx " , " params " } ) ;
dump - > add ( " target " , " Target GPU (sm_xx) " , { " sm_50 " , " sm_52 " , " sm_60 " , " sm_61 " , " sm_70 " } ) ;
opts : : Options * bench = options - > add_group ( " bench " , " Benchmark source code generated by ISAAC " ) ;
bench - > add ( " suite " , " Benchmarking suite to run " , " custom " , { " custom " , " deepbench " } ) ;
bench - > add < std : : shared_ptr < Metric > > ( " metric " , " performance metric for the results " , " tflops " , { { " tflops " , std : : make_shared < FLOPS > ( 1e12 ) } , { " ms " , std : : make_shared < Time > ( 1e-3 ) } , { " us " , std : : make_shared < Time > ( 1e-6 ) } } ) ;
// Constraints
options - > add_constraint ( opts : : OneOf ( { " bench " , " dump " , " search " } ) ) ;
2017-11-09 13:11:41 -05:00
options - > add_constraint ( opts : : OneOf ( { " gemm " , " conv " , " pool " } ) ) ;
// GEMM
opts : : Options * gemm = options - > add_group ( " gemm " , " Use matrix-multiplication " ) ;
gemm - > add ( " layout " , " Transposition layout for A and B " , " NT " , { " NN " , " NT " , " TN " , " TT " } ) ;
gemm - > add < std : : vector < size_t > > ( " shape " , " Matrix shapes (M,N,K) " , { 2048 , 2048 , 2048 } , opts : : SizeConstraint ( 3 ) ) ;
gemm - > add < std : : vector < size_t > > ( " kernel " , " Bypass predictive model to use given tuning parameters " , opts : : SizeConstraint ( 14 ) ) ;
// CONV
opts : : Options * conv = options - > add_group ( " conv " , " Use convolutions " ) ;
conv - > add < std : : vector < size_t > > ( " shape " , " Tensor shapes (D, H, W, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w) " , { 1 , 70 , 14 , 512 , 128 , 64 , 1 , 7 , 7 , 0 , 0 , 0 , 1 , 1 , 1 } , opts : : SizeConstraint ( 15 ) ) ;
conv - > add < std : : vector < size_t > > ( " kernel " , " Bypass predictive model to use given tuning parameters " , opts : : SizeConstraint ( 9 ) ) ;
// POOL
opts : : Options * pool = options - > add_group ( " pool " , " Use pooling " ) ;
pool - > add < std : : vector < size_t > > ( " shape " , " Tensor shapes (D, H, W, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w) " , { 1 , 70 , 14 , 128 , 64 , 1 , 7 , 7 , 0 , 0 , 0 , 1 , 1 , 1 } , opts : : SizeConstraint ( 14 ) ) ;
pool - > add < std : : vector < size_t > > ( " kernel " , " Bypass predictive model to use given tuning parameters " , opts : : SizeConstraint ( 4 ) ) ;
2017-09-01 21:55:03 -04:00
program . parse ( argc , argv ) ;
if ( options - > has ( " bench " ) )
std : : cout < < std : : fixed < < std : : setprecision ( 2 ) ;
//Device
sc : : driver : : Device device = sc : : driver : : backend : : devices ( ) [ options - > get < size_t > ( " device " ) ] ;
if ( options - > has ( " dump " ) & & dump - > has ( " target " ) ) {
std : : string target = dump - > get < std : : string > ( " target " ) ;
char major = target [ 3 ] ;
char minor = target [ 4 ] ;
device . interpret_as ( std : : make_pair ( ( size_t ) std : : atoi ( & major ) , ( size_t ) std : : atoi ( & minor ) ) ) ;
}
static sc : : driver : : Context context ( device ) ;
sc : : driver : : Stream stream ( context ) ;
// Data-Type
sc : : DType dtype = options - > get < sc : : DType > ( " dtype " ) ;
// Kernel name
std : : string name = options - > get < std : : string > ( " name " ) ;
/* Get optimized kernel generator */
std : : unique_ptr < sc : : templates : : Generator > generator ;
// GEMM
if ( options - > has ( " gemm " ) ) {
std : : string layout = gemm - > get < std : : string > ( " layout " ) ;
sc : : IsaacOperation_t AT = layout [ 0 ] = = ' T ' ? sc : : ISAAC_OP_T : sc : : ISAAC_OP_N ;
sc : : IsaacOperation_t BT = layout [ 1 ] = = ' T ' ? sc : : ISAAC_OP_T : sc : : ISAAC_OP_N ;
auto shape = gemm - > get < std : : vector < size_t > > ( " shape " ) ;
size_t M = shape [ 0 ] , N = shape [ 1 ] , K = shape [ 2 ] ;
//Get Source
size_t ldc = M ;
size_t lda = ( AT = = sc : : ISAAC_OP_N ) ? M : K ;
size_t ldb = ( BT = = sc : : ISAAC_OP_N ) ? K : N ;
2017-11-09 13:11:41 -05:00
if ( options - > has ( " search " ) ) {
search_gemm ( M , N , K , AT , BT , dtype ) ;
}
2017-09-01 21:55:03 -04:00
if ( gemm - > has ( " kernel " ) ) {
auto x = gemm - > get < std : : vector < size_t > > ( " kernel " ) ;
2017-11-09 13:11:41 -05:00
generator . reset ( new sc : : templates : : GEMM ( dtype , AT , BT , M , N , K , 0 , lda , 0 , ldb , 0 , ldc , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] , x [ 4 ] , x [ 5 ] , x [ 6 ] , x [ 7 ] , x [ 8 ] , x [ 9 ] , x [ 10 ] , x [ 11 ] , x [ 12 ] , x [ 13 ] ) ) ;
2017-09-01 21:55:03 -04:00
}
else {
sc : : runtime : : GEMMProfile * profile = ( sc : : runtime : : GEMMProfile * ) sc : : runtime : : database . at ( { device . architecture ( ) , sc : : runtime : : GEMM } ) . get ( ) ;
generator . reset ( new sc : : templates : : GEMM ( profile - > predict ( stream , dtype , AT , BT , M , N , K , 0 , lda , 0 , ldb , 0 , ldc ) ) ) ;
}
if ( options - > has ( " dump " ) )
dump_source ( device , * generator , dump , name ) ;
if ( options - > has ( " bench " ) ) {
auto metric = bench - > get < std : : shared_ptr < Metric > > ( " metric " ) ;
print_results_header ( { " AT " , " BT " , " M " , " N " , " K " , " ISAAC " , " cuBLAS " } ) ;
std : : vector < gemm_params_t > shapes ;
//User provided shapes
if ( bench - > get < std : : string > ( " suite " ) = = " custom " )
shapes = { std : : make_tuple ( dtype , AT , BT , M , N , K ) } ;
//SC17 paper shapes
if ( bench - > get < std : : string > ( " suite " ) = = " deepbench " )
shapes = TestBench : : gemm ( dtype ) ;
//Print results
for ( auto x : shapes ) {
std : : tie ( dtype , AT , BT , M , N , K ) = x ;
benchmark_gemm ( * metric , context , device , stream , dtype , AT , BT , M , N , K , gemm - > has ( " kernel " ) ? generator . get ( ) : NULL ) ;
}
}
}
// CONV
if ( options - > has ( " conv " ) ) {
auto x = conv - > get < std : : vector < size_t > > ( " shape " ) ;
2017-11-09 13:11:41 -05:00
param_t D = x [ 0 ] , H = x [ 1 ] , W = x [ 2 ] , C = x [ 3 ] , N = x [ 4 ] , K = x [ 5 ] , T = x [ 6 ] , R = x [ 7 ] , S = x [ 8 ] , pad_d = x [ 9 ] , pad_h = x [ 10 ] , pad_w = x [ 11 ] , stride_d = x [ 12 ] , stride_h = x [ 13 ] , stride_w = x [ 14 ] ;
param_t M , P , Q ;
sc : : templates : : Conv : : output_shapes ( D , H , W , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , M , P , Q ) ;
if ( options - > has ( " search " ) )
search_conv ( W , H , C , N , K , R , S , pad_h , pad_w , stride_h , stride_w , dtype ) ;
2017-09-01 21:55:03 -04:00
if ( conv - > has ( " kernel " ) ) {
auto x = conv - > get < std : : vector < size_t > > ( " kernel " ) ;
2017-11-09 13:11:41 -05:00
generator . reset ( new sc : : templates : : Conv ( dtype , C , D , H , W , N , K , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] , x [ 4 ] , x [ 5 ] , x [ 6 ] , x [ 7 ] , x [ 8 ] ) ) ;
2017-09-01 21:55:03 -04:00
}
else {
sc : : runtime : : ConvProfile * profile = ( sc : : runtime : : ConvProfile * ) sc : : runtime : : database . at ( { device . architecture ( ) , sc : : runtime : : CONV } ) . get ( ) ;
2017-11-09 13:11:41 -05:00
generator . reset ( new sc : : templates : : Conv ( profile - > predict ( stream , dtype , C , D , H , W , N , K , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) ) ) ;
2017-09-01 21:55:03 -04:00
}
if ( options - > has ( " dump " ) )
dump_source ( device , * generator , dump , name ) ;
if ( options - > has ( " bench " ) ) {
auto metric = bench - > get < std : : shared_ptr < Metric > > ( " metric " ) ;
2017-11-09 13:11:41 -05:00
print_results_header ( { " N " , " K " , " M " , " P " , " Q " , " C " , " T " , " R " , " S " , " ISAAC " , " cuDNN " } ) ;
2017-09-01 21:55:03 -04:00
std : : vector < conv_params_t > shapes ;
//User provided shapes
if ( bench - > get < std : : string > ( " suite " ) = = " custom " )
2017-11-09 13:11:41 -05:00
shapes = { std : : make_tuple ( dtype , D , W , H , C , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) } ;
2017-09-01 21:55:03 -04:00
//SuperComputing17 shapes
if ( bench - > get < std : : string > ( " suite " ) = = " deepbench " )
shapes = TestBench : : conv ( dtype ) ;
//Print results
for ( auto x : shapes ) {
2017-11-09 13:11:41 -05:00
std : : tie ( dtype , D , W , H , C , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) = x ;
benchmark_conv ( * metric , context , device , stream , dtype , D , H , W , C , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , conv - > has ( " kernel " ) ? generator . get ( ) : NULL ) ;
}
}
}
// POOL
if ( options - > has ( " pool " ) ) {
auto x = pool - > get < std : : vector < size_t > > ( " shape " ) ;
param_t D = x [ 0 ] , W = x [ 1 ] , H = x [ 2 ] , N = x [ 3 ] , K = x [ 4 ] , T = x [ 5 ] , R = x [ 6 ] , S = x [ 7 ] , pad_d = x [ 8 ] , pad_h = x [ 9 ] , pad_w = x [ 10 ] , stride_d = x [ 11 ] , stride_h = x [ 12 ] , stride_w = x [ 13 ] ;
param_t M , P , Q ;
sc : : templates : : Conv : : output_shapes ( D , H , W , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , M , P , Q ) ;
if ( pool - > has ( " kernel " ) ) {
auto x = pool - > get < std : : vector < size_t > > ( " kernel " ) ;
generator . reset ( new sc : : templates : : Pool ( dtype , K , D , H , W , N , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , x [ 0 ] , x [ 1 ] , x [ 2 ] , x [ 3 ] ) ) ;
}
else {
generator . reset ( new sc : : templates : : Pool ( dtype , K , D , H , W , N , M , P , Q , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) ) ;
}
if ( options - > has ( " dump " ) )
dump_source ( device , * generator , dump , name ) ;
if ( options - > has ( " bench " ) ) {
auto metric = bench - > get < std : : shared_ptr < Metric > > ( " metric " ) ;
print_results_header ( { " N " , " K " , " M " , " P " , " Q " , " T " , " R " , " S " , " ISAAC " , " cuDNN " } ) ;
std : : vector < pool_params_t > shapes ;
//User provided shapes
if ( bench - > get < std : : string > ( " suite " ) = = " custom " )
shapes = { std : : make_tuple ( dtype , D , W , H , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) } ;
//SuperComputing17 shapes
if ( bench - > get < std : : string > ( " suite " ) = = " deepbench " )
shapes = TestBench : : pool ( dtype ) ;
//Print results
for ( auto x : shapes ) {
std : : tie ( dtype , D , W , H , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w ) = x ;
benchmark_pool ( * metric , context , device , stream , dtype , D , H , W , N , K , T , R , S , pad_d , pad_h , pad_w , stride_d , stride_h , stride_w , pool - > has ( " kernel " ) ? generator . get ( ) : NULL ) ;
2017-09-01 21:55:03 -04:00
}
}
}
}