Now compiling ATIDLAS

2014-10-14 23:49:18 -04:00
parent f60adab3dc
commit f91d3b422a
12 changed files with 321 additions and 2501 deletions
--- a/autotune/python/external/configobj.py
+++ b/autotune/python/external/configobj.py
--- a/autotune/python/external/init.py
+++ b/autotune/python/external/init.py
--- a/python/atidlas/atidlas.py
+++ b/python/atidlas/atidlas.py
@@ -0,0 +1,113 @@
+import abc, logging
+from . import _viennacl as _v
+from .pycore import Node, Statement
+
+class OrderType(object):
+    def __init__(*args):
+        raise TypeError("This class is not supposed to be instantiated")
+
+class SequentialOrder(OrderType):
+    vcl_order = _v.statements_tuple_order_type.SEQUENTIAL
+
+class IndependentOrder(OrderType):
+    vcl_order = _v.statements_tuple_order_type.INDEPENDENT
+
+
+class StatementsTuple(object):
+    vcl_statements_tuple = None
+
+    def __init__(self, statements, order = SequentialOrder):
+        if not isinstance(statements, list):
+            statements = [statements]
+        def to_vcl_statement(s):
+            if isinstance(s, Node):
+                return Statement(s).vcl_statement
+            else:
+                return s.vcl_statement
+        vcl_statements = list(map(to_vcl_statement, statements))
+        self.order = order
+        self.vcl_tuple = _v.statements_tuple(vcl_statements, order.vcl_order)
+
+FetchingPolicy = _v.fetching_policy_type
+
+class TemplateBase(object):
+
+    Parameters = _v.template_base.parameters_type
+
+    def __init__(self):
+        pass
+
+    @property
+    def parameters(self):
+        return self._vcl_template.parameters()
+        
+    def lmem_usage(self, statements):
+        return self._vcl_template.lmem_usage(statements.vcl_tuple)
+        
+    def registers_usage(self, statements):
+        return self._vcl_template.registers_usage(statements.vcl_tuple)
+        
+    def check(self, statement):
+        vcl_statement = statement.vcl_statement;
+        vcl_context = statement.result.context.vcl_sub_context;
+        return vcl_statement.check_template(self._vcl_template, vcl_context);
+
+    def execute(self, statement, force_compilation=False):
+        vcl_statement = statement.vcl_statement;
+        vcl_context = statement.result.context.vcl_sub_context;
+        vcl_statement.execute_template(self._vcl_template, vcl_context, force_compilation);
+        return statement.result;
+
+
+class VectorAxpyTemplate(TemplateBase):
+
+    Parameters = _v.vector_axpy_template.parameters_type
+
+    def __init__(self, parameters):
+        super(VectorAxpyTemplate, self).__init__()
+        self._vcl_template = _v.vector_axpy_template(parameters)
+
+
+class MatrixAxpyTemplate(TemplateBase):
+
+    Parameters = _v.matrix_axpy_template.parameters_type
+
+    def __init__(self, parameters):
+        super(MatrixAxpyTemplate, self).__init__()
+        self._vcl_template = _v.matrix_axpy_template(parameters)
+
+
+class ReductionTemplate(TemplateBase):
+
+    Parameters = _v.reduction_template.parameters_type
+
+    def __init__(self, parameters):
+        super(ReductionTemplate, self).__init__()
+        self._vcl_template = _v.reduction_template(parameters)
+
+class RowWiseReductionTemplate(TemplateBase):
+
+    Parameters = _v.row_wise_reduction_template.parameters_type
+
+    def __init__(self, parameters):
+        super(RowWiseReductionTemplate, self).__init__()
+        self._vcl_template = _v.row_wise_reduction_template(parameters)
+
+
+class MatrixProductTemplate(TemplateBase):
+
+    Parameters = _v.matrix_product_template.parameters_type
+
+    def __init__(self, parameters, A_trans, B_trans):
+        super(MatrixProductTemplate, self).__init__();
+        self._A_trans = A_trans
+        self._B_trans = B_trans
+        self._vcl_template = _v.matrix_product_template(parameters, A_trans,  B_trans)
+
+    @property
+    def A_trans(self):
+        return self._A_trans
+
+    @property
+    def B_trans(self):
+        return self._B_trans
--- a/python/autotune/autotune.py
+++ b/python/autotune/autotune.py
@@ -4,14 +4,12 @@ import argparse
 import itertools
 import os

-from external.configobj import ConfigObj
+from configobj import ConfigObj
+from numpy import random

 import pyopencl as cl
 import pyviennacl as vcl
-import numpy as np
-from pyviennacl import backend
-from pyviennacl import opencl
-from pyviennacl import atidlas
+from pyviennacl import backend, opencl, atidlas
 from dataset import generate_dataset
 from model import train_model
 import tools
--- a/python/autotune/dataset.py
+++ b/python/autotune/dataset.py
--- a/python/autotune/genetic.py
+++ b/python/autotune/genetic.py
--- a/python/autotune/model.py
+++ b/python/autotune/model.py
@@ -1,9 +1,8 @@
-from sklearn import *
 from sklearn import tree
 from sklearn import ensemble

-import numpy as np
-import scipy as sp
+from numpy import array, bincount, mean, std, max, argmax, min, argmin, median
+from scipy.stats import gmean


 # def random_forest(Xtr, Ytr):
@@ -62,24 +61,23 @@ import scipy as sp
 def train_model(X, Y, profiles, metric):
    print("Building the model...")

-    Xmean = np.mean(X)
-    Xstd = np.std(X)
+    Xmean = mean(X)
+    Xstd = std(X)
    X = (X - Xmean)/Xstd

    Y = Y[:, :]
-    Ymax = np.max(Y)
+    Ymax = max(Y)
    Y = Y/Ymax

-    ref = np.argmax(np.bincount(np.argmin(Y, axis=1))) #most common profile
+    ref = argmax(bincount(argmin(Y, axis=1))) #most common profile
    cut = int(0.800*X.shape[0]+1)

    #Train the model
    clf = ensemble.RandomForestRegressor(10, max_depth=10).fit(X[:cut,:], Y[:cut,:])

-    t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
-    s = np.array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)])
-    # s = np.maximum(s, 1.0)
-    tt = np.argmin(Y[cut:,:], axis = 1)
-    ss = np.array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)])
-    print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f,  max %.3f"%(sp.stats.gmean(s), np.median(s), np.min(s), np.max(s)))
-    print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f,  max %.3f"%(sp.stats.gmean(ss), np.median(ss), np.min(ss), np.max(ss)))
+    t = argmin(clf.predict(X[cut:,:]), axis = 1)
+    s = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], t)])
+    tt = argmin(Y[cut:,:], axis = 1)
+    ss = array([y[ref]/y[k] for y,k in zip(Y[cut:,:], tt)])
+    print("Testing speedup : mean = %.3f, median = %.3f, min = %.3f,  max %.3f"%(gmean(s), median(s), min(s), max(s)))
+    print("Optimal speedup : mean = %.3f, median = %.3f, min = %.3f,  max %.3f"%(gmean(ss), median(ss), min(ss), max(ss)))
--- a/python/autotune/optimize.py
+++ b/python/autotune/optimize.py
--- a/python/autotune/tools.py
+++ b/python/autotune/tools.py
--- a/autotune/config.ini
+++ b/autotune/config.ini
--- a/python/setup.py
+++ b/python/setup.py
@@ -0,0 +1,91 @@
+import os
+from distutils.command.build_ext import build_ext
+from setuptools import Extension, setup
+from distutils.sysconfig import get_python_inc
+from distutils import sysconfig
+
+platform_cflags = {}
+platform_ldflags = {}
+platform_libs = {}
+class build_ext_subclass(build_ext):
+    """Shamelessly stolen from
+    https://stackoverflow.com/questions/724664
+    """
+    def build_extensions(self):
+        c = self.compiler.compiler_type
+        if c in platform_cflags.keys():
+            for e in self.extensions:
+                e.extra_compile_args = platform_cflags[c]
+        if c in platform_ldflags.keys():
+            for e in self.extensions:
+                e.extra_link_args = platform_ldflags[c]
+        if c in platform_libs.keys():
+            for e in self.extensions:
+                try:
+                    e.libraries += platform_libs[c]
+                except:
+                    e.libraries = platform_libs[c]
+        build_ext.build_extensions(self)
+
+def main():
+
+    def remove_prefixes(optlist, bad_prefixes):
+        for bad_prefix in bad_prefixes:
+            for i, flag in enumerate(optlist):
+                if flag.startswith(bad_prefix):
+                    optlist.pop(i)
+                    break
+        return optlist
+
+    cvars = sysconfig.get_config_vars()
+    cvars['OPT'] = str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-O', '-Wstrict-prototypes', '-DNDEBUG']))
+
+    DEFINES = [('VIENNACL_WITH_OPENCL','1')]
+    INCLUDE_DIRS = [os.path.dirname(os.path.dirname(os.path.abspath(__file__)))]
+
+    setup(
+		name="atidlas",
+		version=[],
+		description="Auto-tuned input-dependent linear algebra subroutines",
+		author='Philippe Tillet',
+		author_email='ptillet@g.harvard.edu',
+		classifiers=[
+		    'Environment :: Console',
+		    'Development Status :: 5 - Production/Stable',
+		    'Intended Audience :: Developers',
+		    'Intended Audience :: Other Audience',
+		    'Intended Audience :: Science/Research',
+		    'License :: OSI Approved :: MIT License',
+		    'Natural Language :: English',
+		    'Programming Language :: C++',
+		    'Programming Language :: Python',
+		    'Programming Language :: Python :: 2',
+		    'Programming Language :: Python :: 2.6',
+		    'Programming Language :: Python :: 2.7',
+		    'Programming Language :: Python :: 3',
+		    'Programming Language :: Python :: 3.2',
+		    'Programming Language :: Python :: 3.3',
+		    'Programming Language :: Python :: 3.4',
+		    'Topic :: Scientific/Engineering',
+		    'Topic :: Scientific/Engineering :: Mathematics',
+		    'Topic :: Scientific/Engineering :: Physics',
+		    'Topic :: Scientific/Engineering :: Machine Learning',
+		],
+
+		packages=["atidlas"],
+		ext_package="atidlas",
+		ext_modules=[Extension(
+		    '_atidlas',[os.path.join("src", "_atidlas.cpp")],
+		    extra_compile_args= [],
+		    extra_link_args=[],
+		    define_macros=DEFINES,
+		    undef_macros=[],
+		    include_dirs=INCLUDE_DIRS,
+		    library_dirs=[],
+		    libraries=['OpenCL']
+		)],
+		cmdclass={'build_ext': build_ext_subclass}
+    )
+
+if __name__ == "__main__":
+    main()
--- a/python/src/_atidlas.cpp
+++ b/python/src/_atidlas.cpp
@@ -0,0 +1,102 @@
+#include <list>
+#include <boost/python.hpp>
+
+#include "viennacl/scheduler/forwards.h"
+#include "viennacl/tools/shared_ptr.hpp"
+
+#include "atidlas/templates/vector_axpy.hpp"
+#include "atidlas/templates/matrix_axpy.hpp"
+#include "atidlas/templates/reduction.hpp"
+#include "atidlas/templates/row_wise_reduction.hpp"
+#include "atidlas/templates/matrix_product.hpp"
+
+#define ENUM_VALUE(NS, V) .value( #V, NS :: V )
+
+namespace bp = boost::python;
+namespace vcl = viennacl;
+
+void export_atidlas()
+{
+
+  bp::enum_<atidlas::fetching_policy_type>
+      ("fetching_policy_type")
+      ENUM_VALUE(atidlas, FETCH_FROM_LOCAL)
+      ENUM_VALUE(atidlas, FETCH_FROM_GLOBAL_CONTIGUOUS)
+      ENUM_VALUE(atidlas, FETCH_FROM_GLOBAL_STRIDED)
+      ;
+      
+  //Base
+  {
+    #define __PROP(name) .def_readonly(#name, &atidlas::template_base::parameters_type::name)
+    bp::scope outer = bp::class_<atidlas::template_base, boost::noncopyable>("template_base", bp::no_init)
+            .def("lmem_usage", &atidlas::template_base::lmem_usage)
+            .def("registers_usage", &atidlas::template_base::registers_usage);
+    bp::class_<atidlas::template_base::parameters_type>("parameters_type", bp::no_init)
+              __PROP(simd_width)
+              __PROP(local_size_0)
+              __PROP(local_size_1);
+
+    #undef __PROP
+  }
+  
+  #define WRAP_TEMPLATE(name, ...) bp::class_<atidlas::template_base_impl<atidlas::name, atidlas::name::parameters_type>, bp::bases<atidlas::template_base>, boost::noncopyable>(#name "_base_impl", bp::no_init)\
+                                  .def("parameters", bp::make_function(&atidlas::name::parameters, bp::return_internal_reference<>()));\
+                                  bp::scope outer = bp::class_<atidlas::name, bp::bases<atidlas::template_base_impl<atidlas::name, atidlas::name::parameters_type> > >(#name, bp::init<atidlas::name::parameters_type, ## __VA_ARGS__>())
+  
+  #define WRAP_PARAMETERS(name, ...) bp::class_<atidlas::name::parameters_type, bp::bases<atidlas::template_base::parameters_type> >("parameters_type", bp::init< __VA_ARGS__ >())
+  #define __PROP_BASE(name, tpname) .def_readonly(#name, &atidlas::tpname::parameters_type::name)
+  //Vector AXPY
+  { 
+    #define __PROP(name) __PROP_BASE(name, vector_axpy_template)
+    WRAP_TEMPLATE(vector_axpy_template);
+    WRAP_PARAMETERS(vector_axpy_template, uint, uint, uint, atidlas::fetching_policy_type)
+        __PROP(num_groups) __PROP(fetching_policy);
+    #undef __PROP
+  }
+  
+  //Matrix AXPY
+  { 
+    #define __PROP(name) __PROP_BASE(name, matrix_axpy_template)
+    WRAP_TEMPLATE(matrix_axpy_template);
+    WRAP_PARAMETERS(matrix_axpy_template, uint, uint, uint, uint, uint, atidlas::fetching_policy_type)
+        __PROP(num_groups_0) __PROP(num_groups_1)  __PROP(fetching_policy);
+    #undef __PROP
+  }
+  
+  //Reduction
+  { 
+    #define __PROP(name) __PROP_BASE(name, reduction_template)
+    WRAP_TEMPLATE(reduction_template);
+    WRAP_PARAMETERS(reduction_template, uint, uint, uint, atidlas::fetching_policy_type)
+        __PROP(num_groups)  __PROP(fetching_policy);
+    #undef __PROP
+  }
+  
+  //Row-wise reduction
+  { 
+    #define __PROP(name) __PROP_BASE(name, row_wise_reduction_template)
+    WRAP_TEMPLATE(row_wise_reduction_template);
+    WRAP_PARAMETERS(row_wise_reduction_template, uint, uint, uint, uint, atidlas::fetching_policy_type)
+        __PROP(num_groups_0);
+    #undef __PROP
+  }
+  
+  //Matrix product
+  { 
+    #define __PROP(name) __PROP_BASE(name, matrix_product_template)
+    WRAP_TEMPLATE(matrix_product_template, char, char);
+    bp::scope b = WRAP_PARAMETERS(matrix_product_template, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
+        __PROP(kL) __PROP(mS) __PROP(kS) __PROP(nS)
+        __PROP(A_fetching_policy) __PROP(B_fetching_policy)
+        __PROP(local_fetch_0) __PROP(local_fetch_1)
+        __PROP(mL) __PROP(nL);
+    
+    #undef __PROP
+  }
+ 
+  bp::enum_<atidlas::statements_container::order_type>
+    ("statements_tuple_order_type")
+    ENUM_VALUE(atidlas::statements_container, SEQUENTIAL)
+    ENUM_VALUE(atidlas::statements_container, INDEPENDENT)
+    ;
+}