2014-09-02 22:03:20 -04:00
from __future__ import division
import argparse
import itertools
import os
from external . configobj import ConfigObj
import pyopencl as cl
import pyviennacl as vcl
2014-10-04 08:58:11 +02:00
import numpy as np
2014-09-02 22:03:20 -04:00
from pyviennacl import backend
from pyviennacl import opencl
from pyviennacl import atidlas
2014-09-27 20:54:17 -04:00
from dataset import generate_dataset
2014-09-28 19:37:56 -04:00
from model import train_model
import tools
2014-09-02 22:03:20 -04:00
import utils
import vclio
import optimize
import sys
DATATYPES = { ' single ' : vcl . float32 ,
2014-09-06 00:39:38 -04:00
' double ' : vcl . float64 }
2014-09-02 22:03:20 -04:00
2014-09-06 00:39:38 -04:00
TYPES = { ' vector-axpy ' : { ' template ' : vcl . atidlas . VectorAxpyTemplate ,
' perf-index ' : lambda x : 3 * x [ 0 ] * x [ 1 ] [ 0 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GB/s ' } ,
2014-09-29 03:01:33 +02:00
2014-09-06 00:39:38 -04:00
' matrix-axpy ' : { ' template ' : vcl . atidlas . MatrixAxpyTemplate ,
' perf-index ' : lambda x : 3 * x [ 0 ] * x [ 1 ] [ 0 ] * x [ 1 ] [ 1 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GB/s ' } ,
2014-09-29 03:01:33 +02:00
2014-09-06 00:39:38 -04:00
' reduction ' : { ' template ' : vcl . atidlas . ReductionTemplate ,
2014-10-05 06:33:50 +02:00
' perf-index ' : lambda x : 2 * x [ 0 ] * x [ 1 ] [ 0 ] / x [ 2 ] * 1e-9 ,
2014-09-06 00:39:38 -04:00
' perf-measure ' : ' GB/s ' } ,
2014-09-29 03:01:33 +02:00
2014-09-06 00:39:38 -04:00
' row-wise-reduction ' : { ' template ' : vcl . atidlas . RowWiseReductionTemplate ,
' perf-index ' : lambda x : x [ 0 ] * x [ 1 ] [ 0 ] * x [ 1 ] [ 1 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GB/s ' } ,
2014-09-29 03:01:33 +02:00
2014-09-06 00:39:38 -04:00
' matrix-product ' : { ' template ' : vcl . atidlas . MatrixProductTemplate ,
' perf-index ' : lambda x : 2 * x [ 1 ] [ 0 ] * x [ 1 ] [ 1 ] * x [ 1 ] [ 2 ] / x [ 2 ] * 1e-9 ,
' perf-measure ' : ' GFLOP/s ' } }
2014-09-29 03:01:33 +02:00
def do_tuning ( config_fname , spec_fname , viennacl_root ) :
config = ConfigObj ( config_fname , configspec = spec_fname )
2014-10-03 09:29:45 +02:00
def map_to_list ( T , x ) :
return list ( map ( T , x if isinstance ( x , list ) else [ x ] ) )
2014-10-05 06:33:50 +02:00
for operation in [ ' vector-axpy ' , ' matrix-axpy ' , ' reduction ' , ' row-wise-reduction ' , ' matrix-product ' ] :
2014-09-29 03:01:33 +02:00
if operation in config :
p = config [ operation ]
confdevices = p [ ' devices ' ]
devices = utils . DEVICES_PRESETS [ confdevices ] if confdevices in utils . DEVICES_PRESETS else [ utils . all_devices [ int ( i ) ] for i in confdevices ]
2014-10-03 09:29:45 +02:00
precisions = map_to_list ( str , p [ ' precision ' ] )
2014-10-05 05:16:21 +02:00
if ' all ' in precisions :
precisions = [ ' single ' , ' double ' ]
2014-09-29 03:01:33 +02:00
datatypes = [ DATATYPES [ k ] for k in precisions ]
#Iterate through the datatypes and the devices
for datatype , device in itertools . product ( datatypes , devices ) :
ctx = cl . Context ( [ device ] )
ctx = vcl . backend . Context ( ctx )
device = ctx . current_device
#Check data-type
if datatype is vcl . float64 and not device . double_fp_config :
sys . stderr . write ( ' Warning : The device ' + device . name + ' does not support double precision! Skipping ... ' )
continue
2014-10-05 05:16:21 +02:00
#Helper for execution
def execute ( device , node , other_params , sizes , fname = os . devnull , parameters = None ) :
2014-10-03 09:29:45 +02:00
if parameters :
TemplateType = TYPES [ operation ] [ ' template ' ]
return tools . benchmark ( TemplateType ( TemplateType . Parameters ( * parameters ) , * other_params ) , statement , device )
2014-09-29 03:01:33 +02:00
print ( ' ----- ' )
print ( ' ' . join ( map ( str , ( " Now tuning: " , datatype . __name__ , ' - ' , operation , ' - ' . join ( other_params ) , ' [ ' + device . name , ' ( ' + device . platform . name + ' )] for sizes ' , sizes ) ) ) )
with open ( fname , " w+ " ) as archive :
2014-10-05 05:16:21 +02:00
with vcl . Statement ( node ) as statement :
return optimize . genetic ( statement , device , TYPES [ operation ] [ ' template ' ] , lambda p : TYPES [ operation ] [ ' template ' ] ( p , * other_params ) ,
2014-10-03 09:29:45 +02:00
lambda t : TYPES [ operation ] [ ' perf-index ' ] ( [ datatype ( ) . itemsize , sizes , t ] ) , TYPES [ operation ] [ ' perf-measure ' ] , archive )
2014-10-05 05:16:21 +02:00
#Helper for tuning
def tune ( execution_handler , nTuning , nDataPoints , draw , additional_parameters ) :
2014-10-04 08:58:11 +02:00
if ' size ' in p :
profile = execution_handler ( map_to_list ( int , p [ ' size ' ] ) )
2014-10-05 05:16:21 +02:00
if ' viennacl-src-root ' in config :
tools . update_viennacl_headers ( config [ ' viennacl-src-root ' ] , device , datatype , operation , additional_parameters , profile )
2014-10-04 08:58:11 +02:00
else :
def compute_perf ( x , t ) :
return TYPES [ operation ] [ ' perf-index ' ] ( [ datatype ( ) . itemsize , x , t ] )
X , Y , profiles = generate_dataset ( TYPES [ operation ] [ ' template ' ] , execution_handler , nTuning , nDataPoints , compute_perf , draw )
train_model ( X , Y , profiles , TYPES [ operation ] [ ' perf-measure ' ] )
2014-09-29 03:01:33 +02:00
#Vector AXPY
if operation == ' vector-axpy ' :
2014-10-03 09:29:45 +02:00
def execution_handler ( sizes , fname = os . devnull , parameters = None ) :
x = vcl . Vector ( sizes [ 0 ] , context = ctx , dtype = datatype )
y = vcl . Vector ( sizes [ 0 ] , context = ctx , dtype = datatype )
2014-10-05 05:16:21 +02:00
z = vcl . Vector ( sizes [ 0 ] , context = ctx , dtype = datatype )
return execute ( device , vcl . Assign ( z , vcl . ElementProd ( vcl . exp ( x + y ) , vcl . cos ( x + y ) ) ) , ( ) , sizes , fname , parameters )
tune ( execution_handler , 50 , 10000 , lambda : 64 * np . random . randint ( low = 10 , high = 100000 , size = 1 ) , ( ) )
2014-10-05 06:33:50 +02:00
#Reduction
if operation == ' reduction ' :
def execution_handler ( sizes , fname = os . devnull , parameters = None ) :
x = vcl . Vector ( sizes [ 0 ] , context = ctx , dtype = datatype )
y = vcl . Vector ( sizes [ 0 ] , context = ctx , dtype = datatype )
s = vcl . Scalar ( 0 , context = ctx , dtype = datatype )
return execute ( device , vcl . Assign ( s , vcl . Dot ( x , y ) ) , ( ) , sizes , fname , parameters )
tune ( execution_handler , 50 , 10000 , lambda : 64 * np . random . randint ( low = 10 , high = 100000 , size = 1 ) , ( ) )
2014-09-29 03:01:33 +02:00
#Matrix AXPY
if operation == ' matrix-axpy ' :
2014-10-04 08:58:11 +02:00
def execution_handler ( sizes , fname = os . devnull , parameters = None ) :
A = vcl . Matrix ( sizes , context = ctx , dtype = datatype )
B = vcl . Matrix ( sizes , context = ctx , dtype = datatype )
2014-10-05 05:16:21 +02:00
C = vcl . Matrix ( sizes , context = ctx , dtype = datatype )
return execute ( device , vcl . Assign ( C , A + B ) , ( ) , sizes , fname , parameters )
tune ( execution_handler , 50 , 10000 , lambda : 64 * np . random . randint ( low = 5 , high = 100 , size = 2 ) , ( ) )
2014-09-29 03:01:33 +02:00
#Row-wise reduction
if operation == ' row-wise-reduction ' :
2014-10-04 08:58:11 +02:00
layouts = map_to_list ( str , p [ ' layout ' ] )
2014-09-29 03:01:33 +02:00
if ' all ' in layouts :
layouts = [ ' N ' , ' T ' ]
for A_trans in layouts :
2014-10-04 08:58:11 +02:00
def execution_handler ( sizes , fname = os . devnull , parameters = None ) :
A = vcl . Matrix ( sizes if A_trans == ' N ' else sizes [ : : - 1 ] , context = ctx , dtype = datatype , layout = vcl . COL_MAJOR )
x = vcl . Vector ( sizes [ 1 ] if A_trans == ' N ' else sizes [ 0 ] , context = ctx , dtype = datatype )
2014-10-05 05:16:21 +02:00
y = vcl . Vector ( sizes [ 0 ] if A_trans == ' N ' else sizes [ 1 ] , context = ctx , dtype = datatype )
2014-10-04 08:58:11 +02:00
LHS = A if A_trans == ' N ' else A . T
2014-10-05 05:16:21 +02:00
return execute ( device , vcl . Assign ( y , LHS * x ) , ( ) , sizes , fname , parameters )
tune ( execution_handler , 50 , 10000 , lambda : 64 * np . random . randint ( low = 5 , high = 100 , size = 2 ) , ( A_trans , ) )
2014-09-29 03:01:33 +02:00
#Matrix Product
if operation == ' matrix-product ' :
2014-10-04 08:58:11 +02:00
layouts = map_to_list ( str , p [ ' layout ' ] )
2014-09-29 03:01:33 +02:00
if ' all ' in layouts :
layouts = [ ' NN ' , ' NT ' , ' TN ' , ' TT ' ]
for layout in layouts :
2014-09-30 10:11:22 +02:00
def execution_handler ( sizes , fname = os . devnull , parameters = None ) :
2014-09-29 03:01:33 +02:00
A_trans = layout [ 0 ]
B_trans = layout [ 1 ]
A = vcl . Matrix ( ( sizes [ 0 ] , sizes [ 1 ] ) if A_trans == ' N ' else ( sizes [ 1 ] , sizes [ 0 ] ) , context = ctx , dtype = datatype , layout = vcl . COL_MAJOR ) ;
B = vcl . Matrix ( ( sizes [ 1 ] , sizes [ 2 ] ) if B_trans == ' N ' else ( sizes [ 2 ] , sizes [ 1 ] ) , context = ctx , dtype = datatype , layout = vcl . COL_MAJOR ) ;
LHS = A if A_trans == ' N ' else A . T
RHS = B if B_trans == ' N ' else B . T
alpha = vcl . HostScalar ( 1.0 , context = ctx , dtype = datatype )
beta = vcl . HostScalar ( 1.0 , context = ctx , dtype = datatype )
C = vcl . Matrix ( ( sizes [ 0 ] , sizes [ 2 ] ) , context = ctx , dtype = datatype , layout = vcl . COL_MAJOR )
2014-10-05 05:16:21 +02:00
return execute ( device , vcl . Assign ( C , LHS * RHS * alpha + C * beta ) , ( A_trans , B_trans ) , sizes , fname , parameters )
tune ( execution_handler , 50 , 10000 , lambda : 64 * np . random . randint ( low = 1 , high = 40 , size = 3 ) , ( layout [ 0 ] , layout [ 1 ] ) )
2014-09-29 03:01:33 +02:00
2014-09-02 22:03:20 -04:00
if __name__ == " __main__ " :
2014-09-29 03:01:33 +02:00
parser = argparse . ArgumentParser ( ) ;
subparsers = parser . add_subparsers ( dest = ' action ' )
print_devices_parser = subparsers . add_parser ( ' list-devices ' , help = ' list the devices available ' )
tune_parser = subparsers . add_parser ( ' tune ' , help = ' tune using a specific configuration file ' )
tune_parser . add_argument ( " --config " , default = " config.ini " , required = False , type = str )
tune_parser . add_argument ( " --viennacl-root " , default = ' ' , required = False , type = str )
args = parser . parse_args ( )
if ( args . action == ' list-devices ' ) :
print ( " ---------------- " )
print ( " Devices available: " )
print ( " ---------------- " )
devices = [ d for platform in cl . get_platforms ( ) for d in platform . get_devices ( ) ]
for ( i , d ) in enumerate ( devices ) :
print ( ' Device ' , i , ' : ' , utils . DEVICE_TYPE_PREFIX [ d . type ] . upper ( ) + ' : ' , d . name , ' on ' , d . platform . name )
print ( " ---------------- " )
else :
print ( " ------ " )
print ( " Auto-tuning " )
print ( " ------ " )
do_tuning ( args . config , ' config_spec.ini ' , args . viennacl_root )