Python/Autotune: Moved devices from config.ini to command line argument

2014-10-27 03:28:46 -04:00
parent 7780423fa1
commit ba50960b0f
5 changed files with 124 additions and 100 deletions
--- a/python/autotune/external/config.ini
+++ b/python/autotune/external/config.ini
@@ -1,28 +1,23 @@
-viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/
+#~ viennacl-src-root = /home/philippe/Development/viennacl-dev/viennacl/

 [vector-axpy]
-devices = 0
-precision = single, double
-size = 5000000
+precision = single
+#~ size = 5000000

-[reduction]
-devices = 0
-precision = single, double
-size = 5000000
-
-[matrix-axpy]
-devices = 0
-precision = single, double
-size = 2560, 2560
-
-[row-wise-reduction]
-devices = 0
-precision = single, double
-layout = N,T
-size = 2560, 2560
-
-[matrix-product]
-devices = 0
-precision = single, double
-layout = NN,NT,TN,TT
-size = 1536, 1536, 1536
+#~ [reduction]
+#~ precision = single, double
+#~ size = 5000000
+#~ 
+#~ [matrix-axpy]
+#~ precision = single, double
+#~ size = 2560, 2560
+#~ 
+#~ [row-wise-reduction]
+#~ precision = single, double
+#~ layout = N,T
+#~ size = 2560, 2560
+#~ 
+#~ [matrix-product]
+#~ precision = single, double
+#~ layout = NN,NT,TN,TT
+#~ size = 1536, 1536, 1536
--- a/python/autotune/pysrc/autotune.py
+++ b/python/autotune/pysrc/autotune.py
@@ -1,18 +1,19 @@
 from __future__ import division

-import argparse, itertools, os, sys
+import argparse, itertools, os, sys, json
 import misc_tools, optimize

 import pyopencl as cl
 import pyviennacl as vcl
 import pyatidlas as atd
-
+import numpy as np

 from configobj import ConfigObj
 from numpy import random
 from dataset import generate_dataset
 from model import train_model

+
 DATATYPES = { 'single' : vcl.float32,
              'double' : vcl.float64 }

@@ -36,34 +37,34 @@ TYPES = { 'vector-axpy': {'template':atd.VectorAxpyTemplate,
                            'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
                            'perf-measure': 'GFLOP/s'} }

-def do_tuning(config_fname, viennacl_root):
+
+def do_tuning(config_fname, viennacl_root, device):
+    json_out = {}
    config = ConfigObj(config_fname)
+
    def map_to_list(T, x):
        return list(map(T, x if isinstance(x, list) else [x]))
+
    for operation in ['vector-axpy', 'matrix-axpy', 'reduction', 'row-wise-reduction', 'matrix-product']:
+
        if operation in config:
            p = config[operation]
-            confdevices = p['devices']
-            all_devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
-            DEVICES_PRESETS = {'all': all_devices,
-                               'gpus': [d for d in all_devices if d.type==cl.device_type.GPU],
-                               'cpus': [d for d in all_devices if d.type==cl.device_type.CPU],
-                               'accelerators': [d for d in all_devices if d.type==cl.device_type.ACCELERATOR]
-            }
-            devices = DEVICES_PRESETS[confdevices] if confdevices in DEVICES_PRESETS else [all_devices[int(i)] for i in confdevices]
            precisions =  map_to_list(str, p['precision'])
            if 'all' in precisions:
                precisions = ['single','double']
            datatypes = [DATATYPES[k] for k in precisions]
-            #Iterate through the datatypes and the devices
-            for datatype, device in itertools.product(datatypes, devices):
+
+            #Iterate through the datatypes
+            for datatype in datatypes:
+
                ctx = cl.Context([device])
                ctx = vcl.backend.Context(ctx)
-                device = ctx.current_device
+
                #Check data-type
                if datatype is vcl.float64 and not device.double_fp_config:
                    sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
                    continue
+
                #Helper for execution
                def execute(device, node, other_params, sizes, fname = os.devnull, parameters = None):
                    with vcl.Statement(node) as statement:
@@ -75,6 +76,7 @@ def do_tuning(config_fname, viennacl_root):
                        with open(fname, "w+") as archive:
                            return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
                                                    lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
+
                #Helper for tuning
                def tune(execution_handler, nTuning, nDataPoints, draw, additional_parameters):
                    if 'size' in p:
@@ -85,7 +87,20 @@ def do_tuning(config_fname, viennacl_root):
                        def compute_perf(x, t):
                            return TYPES[operation]['perf-index']([datatype().itemsize, x, t])
                        X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler, nTuning, nDataPoints, draw)
-                        train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
+                        clf = train_model(X, Y, profiles, TYPES[operation]['perf-measure'])
+
+                        #Update JSON
+                        full_operation = operation + ''.join(additional_parameters)
+                        if full_operation not in json_out:
+                            json_out[full_operation] = {}
+                        json_out[full_operation][datatype.__name__] = {}
+                        D = json_out[full_operation][datatype.__name__]
+                        D['profiles'] = [ prof.astype('int').tolist() for prof in profiles]
+                        D['predictor'] = [{'children_left': e.tree_.children_left.tolist(),
+                                       'children_right': e.tree_.children_right.tolist(),
+                                       'threshold': e.tree_.threshold.astype('float32').tolist(),
+                                       'feature': e.tree_.feature.astype('float32').tolist(),
+                                       'value': e.tree_.value[:,:,0].astype('float32').tolist()} for e in clf.estimators_]

                #Vector AXPY
                if operation=='vector-axpy':
@@ -143,6 +158,10 @@ def do_tuning(config_fname, viennacl_root):
                            return execute(device, vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname, parameters)
                        tune(execution_handler, 50, 2000, lambda : 64*np.random.randint(low=1, high=40, size=3),(layout[0], layout[1]))

+    dname = misc_tools.sanitize_string(device.name)
+    json_out["version"] = "1.0"
+    json.dump(json_out, open(dname + '.json','w'))
+


 if __name__ == "__main__":
@@ -151,14 +170,15 @@ if __name__ == "__main__":
    print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available')
    tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file')
    tune_parser.add_argument("--config", default="config.ini", required=False, type=str)
+    tune_parser.add_argument("--device", default=0, required=False, type=str)
    tune_parser.add_argument("--viennacl-root", default='', required=False, type=str)
    args = parser.parse_args()

+    devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
    if(args.action=='list-devices'):
        print("----------------")
        print("Devices available:")
        print("----------------")
-        devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
        for (i, d) in enumerate(devices):
            print 'Device', i, '|',  cl.device_type.to_string(d.type), '|', d.name, 'on', d.platform.name
        print("----------------")
@@ -166,4 +186,4 @@ if __name__ == "__main__":
        print("------")
        print("Auto-tuning")
        print("------")
-        do_tuning(args.config, args.viennacl_root)
+        do_tuning(args.config, args.viennacl_root, devices[args.device])
--- a/python/autotune/pysrc/dataset.py
+++ b/python/autotune/pysrc/dataset.py
@@ -15,48 +15,53 @@ def resample(X, draw):

 def generate_dataset(TemplateType, execution_handler, nTuning, nDataPoints, draw):

-    print "Getting some good profiles..."
-    nDim = draw().size
-    X = np.empty((nTuning, nDim))
-    t = np.empty(nTuning)
-    profiles = []
-    for i in range(nTuning):
-        x = resample(X, draw)
-        y = execution_handler(x)
-        if y not in profiles:
-            profiles.append(y)
-        idx = profiles.index(y)
-        X[i,:] = x
-        t[i] = idx
-
-    print "Generating the dataset..."
-    Y = np.empty((nDataPoints, len(profiles)))
-    X = np.empty((nDataPoints, nDim))
-    t = []
-
-    for i in range(nDataPoints):
-        x = resample(X, draw)
-        for j,y in enumerate(profiles):
-            T = execution_handler(x, os.devnull, y)
-            Y[i,j] = T
-        idx = np.argmax(Y[i,:])
-        X[i,:] = x
-        t = np.argmax(Y[:i+1,], axis=1)
-        if i%10==0:
-            sys.stdout.write('%d data points generated\r'%i)
-            sys.stdout.flush()
+    # print "Getting some good profiles..."
+    # nDim = draw().size
+    # X = np.empty((nTuning, nDim))
+    # t = np.empty(nTuning)
+    # profiles = []
+    # for i in range(nTuning):
+    #     x = resample(X, draw)
+    #     y = execution_handler(x)
+    #     if y not in profiles:
+    #         profiles.append(y)
+    #     idx = profiles.index(y)
+    #     X[i,:] = x
+    #     t[i] = idx
+    #
+    # print "Generating the dataset..."
+    # Y = np.empty((nDataPoints, len(profiles)))
+    # X = np.empty((nDataPoints, nDim))
+    # t = []
+    #
+    # for i in range(nDataPoints):
+    #     x = resample(X, draw)
+    #     for j,y in enumerate(profiles):
+    #         T = execution_handler(x, os.devnull, y)
+    #         Y[i,j] = T
+    #     idx = np.argmax(Y[i,:])
+    #     X[i,:] = x
+    #     t = np.argmax(Y[:i+1,], axis=1)
+    #     if i%10==0:
+    #         sys.stdout.write('%d data points generated\r'%i)
+    #         sys.stdout.flush()

    template_name = TemplateType.__name__
    dir = os.path.join("data", template_name)
    if not os.path.exists(dir):
        os.makedirs(dir)

-    np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
-    np.savetxt(os.path.join(dir,"X.csv"), X)
-    np.savetxt(os.path.join(dir,"Y.csv"), Y)
+    # np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
+    # np.savetxt(os.path.join(dir,"X.csv"), X)
+    # np.savetxt(os.path.join(dir,"Y.csv"), Y)

    profiles = np.loadtxt(os.path.join(dir, "profiles.csv"))
    X = np.loadtxt(os.path.join(dir, "X.csv"),ndmin=2)
    Y = np.loadtxt(os.path.join(dir, "Y.csv"),ndmin=2)

+    #idx = np.argsort(np.bincount(np.argmin(Y, axis=1)))
+    idx = np.argsort(Y[np.argmax(X),:])
+    Y = Y[:, idx]
+    profiles = profiles[idx]
+
    return X, Y, profiles
--- a/python/autotune/pysrc/misc_tools.py
+++ b/python/autotune/pysrc/misc_tools.py
@@ -207,13 +207,13 @@ def benchmark(template, statement, device):
        return current_time/N


-def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
-    
 def sanitize_string(string, keep_chars = ['_']):
    string = string.replace(' ', '_').replace('-', '_').lower()
    string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
    return string

+def update_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
+    
    def append_include(data, path):
        include_name = '#include "' + path +'"\n'
        already_included = data.find(include_name)
--- a/python/autotune/pysrc/model.py
+++ b/python/autotune/pysrc/model.py
@@ -1,7 +1,6 @@
 from sklearn import tree
 from sklearn import ensemble
-from numpy import array, bincount, mean, std, max, argmax, min, argmin, median
-
+import numpy as np

 def gmean(a, axis=0, dtype=None):
    if not isinstance(a, np.ndarray):  # if not an ndarray object attempt to convert it
@@ -16,25 +15,30 @@ def gmean(a, axis=0, dtype=None):
    return np.exp(log_a.mean(axis=axis))
    
 def train_model(X, Y, profiles, metric):
-    print("Building the model...")
-
-    Xmean = mean(X)
-    Xstd = std(X)
-    X = (X - Xmean)/Xstd
-
    Y=Y[:,:]
-    Ymax = max(Y)
+    profiles=profiles[:]
+    Ymax = np.max(Y)
    Y = Y/Ymax

-    ref = argmax(bincount(argmin(Y, axis=1))) #most common profile
-    cut = int(0.800*X.shape[0]+1)
-
    #Train the model
-    clf = ensemble.RandomForestRegressor(10, max_depth=10).fit(X[:cut,:], Y[:cut,:])
+    cut = int(0.75*X.shape[0])
+    clf = ensemble.RandomForestRegressor(10, max_depth=4).fit(X[:cut,:], Y[:cut,:])

-    t = argmin(clf.predict(X[cut:,:]), axis = 1)
-    s = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)])
-    tt = argmin(Y[cut:,:], axis = 1)
-    ss = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)])
-    print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f,  max %.3f"%(gmean(s), median(s), min(s), max(s)))
-    print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f,  max %.3f"%(gmean(ss), median(ss), min(ss), max(ss)))
+    print clf.predict([10000])
+
+    t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
+    s = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], t)])
+    tt = np.argmin(Y[cut:,:], axis = 1)
+    ss = np.array([y[0]/y[k] for y,k in zip(Y[cut:,:], tt)])
+
+    p5 = lambda a: np.percentile(a, 5)
+    p25 = lambda a: np.percentile(a, 25)
+    p50 = lambda a: np.percentile(a, 50)
+    p75 = lambda a: np.percentile(a, 75)
+    p95 = lambda a: np.percentile(a, 95)
+
+    print("Percentile     :\t 5 \t 25 \t 50 \t 75 \t 95")
+    print("Testing speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(s), p25(s), p50(s), p75(s), p95(s)))
+    print("Optimal speedup:\t %.2f\t %.2f\t %.2f\t %.2f\t %.3f"%(p5(ss), p25(ss), p50(ss), p75(ss), p95(ss)))
+
+    return clf