2014-09-02 22:03:20 -04:00
from __future__ import division
import argparse
import itertools
import os
from external . configobj import ConfigObj
import pyopencl as cl
import pyviennacl as vcl
from pyviennacl import backend
from pyviennacl import opencl
from pyviennacl import atidlas
2014-09-27 20:54:17 -04:00
from dataset import generate_dataset
2014-09-28 19:37:56 -04:00
from model import train_model
import tools
2014-09-02 22:03:20 -04:00
import utils
import vclio
import optimize
import sys
DATATYPES = { ' single ' : vcl . float32 ,
2014-09-06 00:39:38 -04:00
' double ' : vcl . float64 }
2014-09-02 22:03:20 -04:00
2014-09-06 00:39:38 -04:00
TYPES = { ' vector-axpy ' : { ' template ' : vcl . atidlas . VectorAxpyTemplate ,
' parameter-names ' : [ ' simd-width ' , ' local-size-0 ' , ' num-groups-0 ' , ' fetch ' ] ,
' perf-index ' : lambda x : 3 * x [ 0 ] * x [ 1 ] [ 0 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GB/s ' } ,
2014-09-29 03:01:33 +02:00
2014-09-06 00:39:38 -04:00
' matrix-axpy ' : { ' template ' : vcl . atidlas . MatrixAxpyTemplate ,
' parameter-names ' : [ ' simd-width ' , ' local-size-0 ' , ' local-size-1 ' , ' num-groups-0 ' , ' num-groups-1 ' , ' fetch ' ] ,
' perf-index ' : lambda x : 3 * x [ 0 ] * x [ 1 ] [ 0 ] * x [ 1 ] [ 1 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GB/s ' } ,
2014-09-29 03:01:33 +02:00
2014-09-06 00:39:38 -04:00
' reduction ' : { ' template ' : vcl . atidlas . ReductionTemplate ,
' parameter-names ' : [ ' simd-width ' , ' local-size-0 ' , ' num-groups-0 ' , ' fetch ' ] ,
' perf-index ' : lambda x : 2 * x [ 0 ] * x [ 1 ] [ 0 ] * x [ 1 ] [ 1 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GB/s ' } ,
2014-09-29 03:01:33 +02:00
2014-09-06 00:39:38 -04:00
' row-wise-reduction ' : { ' template ' : vcl . atidlas . RowWiseReductionTemplate ,
' parameter-names ' : [ ' simd-width ' , ' local-size-0 ' , ' local-size-1 ' , ' num-groups-0 ' , ' fetch ' ] ,
' perf-index ' : lambda x : x [ 0 ] * x [ 1 ] [ 0 ] * x [ 1 ] [ 1 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GB/s ' } ,
2014-09-29 03:01:33 +02:00
2014-09-06 00:39:38 -04:00
' matrix-product ' : { ' template ' : vcl . atidlas . MatrixProductTemplate ,
' parameter-names ' : [ ' simd-width ' , ' local-size-0 ' , ' kL ' , ' local-size-1 ' , ' mS ' , ' kS ' , ' nS ' , ' A-fetch-policy ' , ' B-fetch-policy ' , ' local-fetch-size-0 ' , ' local-fetch-size-1 ' ] ,
' perf-index ' : lambda x : 2 * x [ 1 ] [ 0 ] * x [ 1 ] [ 1 ] * x [ 1 ] [ 2 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GFLOP/s ' } }
2014-09-29 03:01:33 +02:00
def do_tuning ( config_fname , spec_fname , viennacl_root ) :
config = ConfigObj ( config_fname , configspec = spec_fname )
map_to_list = lambda T : list ( map ( T [ 0 ] , T [ 1 ] if isinstance ( T [ 1 ] , list ) else [ T [ 1 ] ] ) )
for operation in [ ' vector-axpy ' , ' matrix-axpy ' , ' row-wise-reduction ' , ' matrix-product ' ] :
if operation in config :
p = config [ operation ]
confdevices = p [ ' devices ' ]
devices = utils . DEVICES_PRESETS [ confdevices ] if confdevices in utils . DEVICES_PRESETS else [ utils . all_devices [ int ( i ) ] for i in confdevices ]
precisions = map_to_list ( ( str , p [ ' precision ' ] ) )
datatypes = [ DATATYPES [ k ] for k in precisions ]
#Iterate through the datatypes and the devices
for datatype , device in itertools . product ( datatypes , devices ) :
ctx = cl . Context ( [ device ] )
ctx = vcl . backend . Context ( ctx )
device = ctx . current_device
#Check data-type
if datatype is vcl . float64 and not device . double_fp_config :
sys . stderr . write ( ' Warning : The device ' + device . name + ' does not support double precision! Skipping ... ' )
continue
#Helper
def execute ( statement , other_params , sizes , fname = os . devnull ) :
print ( ' ----- ' )
print ( ' ' . join ( map ( str , ( " Now tuning: " , datatype . __name__ , ' - ' , operation , ' - ' . join ( other_params ) , ' [ ' + device . name , ' ( ' + device . platform . name + ' )] for sizes ' , sizes ) ) ) )
with open ( fname , " w+ " ) as archive :
return optimize . genetic ( statement , ctx , TYPES [ operation ] [ ' template ' ] , lambda p : TYPES [ operation ] [ ' template ' ] ( p , * other_params ) ,
TYPES [ operation ] [ ' parameter-names ' ] , lambda t : TYPES [ operation ] [ ' perf-index ' ] ( [ datatype ( ) . itemsize , sizes , t ] ) , TYPES [ operation ] [ ' perf-measure ' ] , archive )
s = map_to_list ( ( int , p [ ' size ' ] ) )
#Vector AXPY
if operation == ' vector-axpy ' :
x = vcl . Vector ( s [ 0 ] , context = ctx , dtype = datatype )
y = vcl . Vector ( s [ 0 ] , context = ctx , dtype = datatype )
execute ( vcl . ElementProd ( vcl . exp ( x + y ) , vcl . cos ( x + y ) ) , ( ) )
#Matrix AXPY
if operation == ' matrix-axpy ' :
A = vcl . Matrix ( s , context = ctx , dtype = datatype )
B = vcl . Matrix ( s , context = ctx , dtype = datatype )
execute ( A + B , ( ) )
#Row-wise reduction
if operation == ' row-wise-reduction ' :
layouts = map_to_list ( ( str , p [ ' layout ' ] ) )
if ' all ' in layouts :
layouts = [ ' N ' , ' T ' ]
for A_trans in layouts :
A = vcl . Matrix ( s if A_trans == ' N ' else s [ : : - 1 ] , context = ctx , dtype = datatype , layout = vcl . COL_MAJOR )
x = vcl . Vector ( s [ 1 ] if A_trans == ' N ' else s [ 0 ] , context = ctx , dtype = datatype )
LHS = A if A_trans == ' N ' else A . T
execute ( LHS * x , ( ) )
#Matrix Product
if operation == ' matrix-product ' :
layouts = map_to_list ( ( str , p [ ' layout ' ] ) )
if ' all ' in layouts :
layouts = [ ' NN ' , ' NT ' , ' TN ' , ' TT ' ]
for layout in layouts :
def execution_handler ( sizes , fname , parameters = None ) :
A_trans = layout [ 0 ]
B_trans = layout [ 1 ]
A = vcl . Matrix ( ( sizes [ 0 ] , sizes [ 1 ] ) if A_trans == ' N ' else ( sizes [ 1 ] , sizes [ 0 ] ) , context = ctx , dtype = datatype , layout = vcl . COL_MAJOR ) ;
B = vcl . Matrix ( ( sizes [ 1 ] , sizes [ 2 ] ) if B_trans == ' N ' else ( sizes [ 2 ] , sizes [ 1 ] ) , context = ctx , dtype = datatype , layout = vcl . COL_MAJOR ) ;
LHS = A if A_trans == ' N ' else A . T
RHS = B if B_trans == ' N ' else B . T
alpha = vcl . HostScalar ( 1.0 , context = ctx , dtype = datatype )
beta = vcl . HostScalar ( 1.0 , context = ctx , dtype = datatype )
C = vcl . Matrix ( ( sizes [ 0 ] , sizes [ 2 ] ) , context = ctx , dtype = datatype , layout = vcl . COL_MAJOR )
statement = vcl . Statement ( vcl . Assign ( C , LHS * RHS * alpha + C * beta ) )
if parameters :
TemplateType = TYPES [ operation ] [ ' template ' ]
return tools . benchmark ( TemplateType ( TemplateType . Parameters ( * parameters ) , A_trans , B_trans ) , statement , device )
else :
execute ( statement , ( A_trans , B_trans ) , sizes , fname )
X , Y , profiles = generate_dataset ( TYPES [ operation ] [ ' template ' ] , execution_handler )
train_model ( X , Y , profiles )
2014-09-02 22:03:20 -04:00
if __name__ == " __main__ " :
2014-09-29 03:01:33 +02:00
parser = argparse . ArgumentParser ( ) ;
subparsers = parser . add_subparsers ( dest = ' action ' )
print_devices_parser = subparsers . add_parser ( ' list-devices ' , help = ' list the devices available ' )
tune_parser = subparsers . add_parser ( ' tune ' , help = ' tune using a specific configuration file ' )
tune_parser . add_argument ( " --config " , default = " config.ini " , required = False , type = str )
tune_parser . add_argument ( " --viennacl-root " , default = ' ' , required = False , type = str )
args = parser . parse_args ( )
if ( args . action == ' list-devices ' ) :
print ( " ---------------- " )
print ( " Devices available: " )
print ( " ---------------- " )
devices = [ d for platform in cl . get_platforms ( ) for d in platform . get_devices ( ) ]
for ( i , d ) in enumerate ( devices ) :
print ( ' Device ' , i , ' : ' , utils . DEVICE_TYPE_PREFIX [ d . type ] . upper ( ) + ' : ' , d . name , ' on ' , d . platform . name )
print ( " ---------------- " )
else :
print ( " ------ " )
print ( " Auto-tuning " )
print ( " ------ " )
do_tuning ( args . config , ' config_spec.ini ' , args . viennacl_root )