Tune: Further file hierarchy improvements

2015-08-17 18:01:17 -07:00
parent a23e976ca7
commit da20db711b
59 changed files with 87 additions and 48 deletions
--- a/python/isaac/external/init.py
+++ b/python/isaac/external/init.py
--- a/python/isaac/external/sklearn/init.py
+++ b/python/isaac/external/sklearn/init.py
@@ -0,0 +1,12 @@
+"""
+The :mod:`sklearn.tree` module includes decision tree-based models for
+classification and regression.
+"""
+
+from .tree import DecisionTreeClassifier
+from .tree import DecisionTreeRegressor
+from .tree import ExtraTreeClassifier
+from .tree import ExtraTreeRegressor
+
+__all__ = ["DecisionTreeClassifier", "DecisionTreeRegressor",
+           "ExtraTreeClassifier", "ExtraTreeRegressor"]
--- a/python/isaac/external/sklearn/forest.py
+++ b/python/isaac/external/sklearn/forest.py
--- a/python/isaac/external/sklearn/setup.py
+++ b/python/isaac/external/sklearn/setup.py
@@ -0,0 +1,22 @@
+import os
+
+import numpy
+from numpy.distutils.misc_util import Configuration
+
+
+def configuration(parent_package="", top_path=None):
+    config = Configuration("tree", parent_package, top_path)
+    libraries = []
+    if os.name == 'posix':
+        libraries.append('m')
+    config.add_extension("_tree",
+                         sources=["_tree.c"],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries,
+                         extra_compile_args=["-O3"])
+    return config
+
+if __name__ == "__main__":
+    from numpy.distutils.core import setup
+    setup(**configuration().todict())
+    print 'aaa'
--- a/python/isaac/external/sklearn/tree.py
+++ b/python/isaac/external/sklearn/tree.py
@@ -0,0 +1,835 @@
+"""
+This module gathers tree-based methods, including decision, regression and
+randomized trees. Single and multi-output problems are both handled.
+"""
+
+# Authors: Gilles Louppe <g.louppe@gmail.com>
+#          Peter Prettenhofer <peter.prettenhofer@gmail.com>
+#          Brian Holt <bdholt1@gmail.com>
+#          Noel Dawe <noel@dawe.me>
+#          Satrajit Gosh <satrajit.ghosh@gmail.com>
+#          Joly Arnaud <arnaud.v.joly@gmail.com>
+#          Fares Hedayati <fares.hedayati@gmail.com>
+#
+# Licence: BSD 3 clause
+
+# MODIFICATIONS:
+#   - Removed base classes
+#   - Incorporated required functions from six, utils
+
+from __future__ import division
+
+import numbers
+from abc import  abstractmethod
+
+import numpy as np
+
+from utils import NotFittedError, check_is_fitted, compute_sample_weight, check_array, check_random_state, ClassifierMixin, RegressorMixin, string_types
+from utils import BaseEstimator
+
+from ._tree import Criterion
+from ._tree import Splitter
+from ._tree import DepthFirstTreeBuilder, BestFirstTreeBuilder
+from ._tree import Tree
+from . import _tree
+
+__all__ = ["DecisionTreeClassifier", "DecisionTreeRegressor",
+           "ExtraTreeClassifier", "ExtraTreeRegressor"]
+
+
+# =============================================================================
+# Types and constants
+# =============================================================================
+
+DTYPE = _tree.DTYPE
+DOUBLE = _tree.DOUBLE
+
+CRITERIA_CLF = {"gini": _tree.Gini, "entropy": _tree.Entropy}
+CRITERIA_REG = {"mse": _tree.MSE, "friedman_mse": _tree.FriedmanMSE}
+
+DENSE_SPLITTERS = {"best": _tree.BestSplitter,
+                   "presort-best": _tree.PresortBestSplitter,
+                   "random": _tree.RandomSplitter}
+
+SPARSE_SPLITTERS = {"best": _tree.BestSparseSplitter,
+                    "random": _tree.RandomSparseSplitter}
+
+# =============================================================================
+# Base decision tree
+# =============================================================================
+
+
+class BaseDecisionTree(BaseEstimator):
+    """Base class for decision trees.
+
+    Warning: This class should not be used directly.
+    Use derived classes instead.
+    """
+
+    @abstractmethod
+    def __init__(self,
+                 criterion,
+                 splitter,
+                 max_depth,
+                 min_samples_split,
+                 min_samples_leaf,
+                 min_weight_fraction_leaf,
+                 max_features,
+                 max_leaf_nodes,
+                 random_state,
+                 class_weight=None):
+        self.criterion = criterion
+        self.splitter = splitter
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.random_state = random_state
+        self.max_leaf_nodes = max_leaf_nodes
+        self.class_weight = class_weight
+
+        self.n_features_ = None
+        self.n_outputs_ = None
+        self.classes_ = None
+        self.n_classes_ = None
+
+        self.tree_ = None
+        self.max_features_ = None
+
+    def fit(self, X, y, sample_weight=None, check_input=True):
+        """Build a decision tree from the training set (X, y).
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape = [n_samples, n_features]
+            The training input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csc_matrix``.
+
+        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
+            The target values (class labels in classification, real numbers in
+            regression). In the regression case, use ``dtype=np.float64`` and
+            ``order='C'`` for maximum efficiency.
+
+        sample_weight : array-like, shape = [n_samples] or None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. In the case of
+            classification, splits are also ignored if they would result in any
+            single class carrying a negative weight in either child node.
+
+        check_input : boolean, (default=True)
+            Allow to bypass several input checking.
+            Don't use this parameter unless you know what you do.
+
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
+        random_state = check_random_state(self.random_state)
+        if check_input:
+            X = check_array(X, dtype=DTYPE)
+
+        # Determine output settings
+        n_samples, self.n_features_ = X.shape
+        is_classification = isinstance(self, ClassifierMixin)
+
+        y = np.atleast_1d(y)
+        expanded_class_weight = None
+
+        if y.ndim == 1:
+            # reshape is necessary to preserve the data contiguity against vs
+            # [:, np.newaxis] that does not.
+            y = np.reshape(y, (-1, 1))
+
+        self.n_outputs_ = y.shape[1]
+
+        if is_classification:
+            y = np.copy(y)
+
+            self.classes_ = []
+            self.n_classes_ = []
+
+            if self.class_weight is not None:
+                y_original = np.copy(y)
+
+            for k in range(self.n_outputs_):
+                classes_k, y[:, k] = np.unique(y[:, k], return_inverse=True)
+                self.classes_.append(classes_k)
+                self.n_classes_.append(classes_k.shape[0])
+
+            if self.class_weight is not None:
+                expanded_class_weight = compute_sample_weight(
+                    self.class_weight, y_original)
+
+        else:
+            self.classes_ = [None] * self.n_outputs_
+            self.n_classes_ = [1] * self.n_outputs_
+
+        self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)
+
+        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
+            y = np.ascontiguousarray(y, dtype=DOUBLE)
+
+        # Check parameters
+        max_depth = ((2 ** 31) - 1 if self.max_depth is None
+                     else self.max_depth)
+        max_leaf_nodes = (-1 if self.max_leaf_nodes is None
+                          else self.max_leaf_nodes)
+
+        if isinstance(self.max_features, string_types):
+            if self.max_features == "auto":
+                if is_classification:
+                    max_features = max(1, int(np.sqrt(self.n_features_)))
+                else:
+                    max_features = self.n_features_
+            elif self.max_features == "sqrt":
+                max_features = max(1, int(np.sqrt(self.n_features_)))
+            elif self.max_features == "log2":
+                max_features = max(1, int(np.log2(self.n_features_)))
+            else:
+                raise ValueError(
+                    'Invalid value for max_features. Allowed string '
+                    'values are "auto", "sqrt" or "log2".')
+                
+        elif self.max_features is None:
+            max_features = self.n_features_
+        elif isinstance(self.max_features, (numbers.Integral, np.integer)):
+            max_features = self.max_features
+        else:  # float
+            if self.max_features > 0.0:
+                max_features = max(1, int(self.max_features * self.n_features_))
+            else:
+                max_features = 0
+
+        self.max_features_ = max_features
+
+        if len(y) != n_samples:
+            raise ValueError("Number of labels=%d does not match "
+                             "number of samples=%d" % (len(y), n_samples))
+        if self.min_samples_split <= 0:
+            raise ValueError("min_samples_split must be greater than zero.")
+        if self.min_samples_leaf <= 0:
+            raise ValueError("min_samples_leaf must be greater than zero.")
+        if not 0 <= self.min_weight_fraction_leaf <= 0.5:
+            raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
+        if max_depth <= 0:
+            raise ValueError("max_depth must be greater than zero. ")
+        if not (0 < max_features <= self.n_features_):
+            raise ValueError("max_features must be in (0, n_features]")
+        if not isinstance(max_leaf_nodes, (numbers.Integral, np.integer)):
+            raise ValueError("max_leaf_nodes must be integral number but was "
+                             "%r" % max_leaf_nodes)
+        if -1 < max_leaf_nodes < 2:
+            raise ValueError(("max_leaf_nodes {0} must be either smaller than "
+                              "0 or larger than 1").format(max_leaf_nodes))
+
+        if sample_weight is not None:
+            if (getattr(sample_weight, "dtype", None) != DOUBLE or
+                    not sample_weight.flags.contiguous):
+                sample_weight = np.ascontiguousarray(
+                    sample_weight, dtype=DOUBLE)
+            if len(sample_weight.shape) > 1:
+                raise ValueError("Sample weights array has more "
+                                 "than one dimension: %d" %
+                                 len(sample_weight.shape))
+            if len(sample_weight) != n_samples:
+                raise ValueError("Number of weights=%d does not match "
+                                 "number of samples=%d" %
+                                 (len(sample_weight), n_samples))
+
+        if expanded_class_weight is not None:
+            if sample_weight is not None:
+                sample_weight = sample_weight * expanded_class_weight
+            else:
+                sample_weight = expanded_class_weight
+
+        # Set min_weight_leaf from min_weight_fraction_leaf
+        if self.min_weight_fraction_leaf != 0. and sample_weight is not None:
+            min_weight_leaf = (self.min_weight_fraction_leaf *
+                               np.sum(sample_weight))
+        else:
+            min_weight_leaf = 0.
+
+        # Set min_samples_split sensibly
+        min_samples_split = max(self.min_samples_split,
+                                2 * self.min_samples_leaf)
+
+        # Build tree
+        criterion = self.criterion
+        if not isinstance(criterion, Criterion):
+            if is_classification:
+                criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
+                                                         self.n_classes_)
+            else:
+                criterion = CRITERIA_REG[self.criterion](self.n_outputs_)
+
+        SPLITTERS = DENSE_SPLITTERS
+
+        splitter = self.splitter
+        if not isinstance(self.splitter, Splitter):
+            splitter = SPLITTERS[self.splitter](criterion,
+                                                self.max_features_,
+                                                self.min_samples_leaf,
+                                                min_weight_leaf,
+                                                random_state)
+
+        self.tree_ = Tree(self.n_features_, self.n_classes_, self.n_outputs_)
+
+        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
+        if max_leaf_nodes < 0:
+            builder = DepthFirstTreeBuilder(splitter, min_samples_split,
+                                            self.min_samples_leaf,
+                                            min_weight_leaf,
+                                            max_depth)
+        else:
+            builder = BestFirstTreeBuilder(splitter, min_samples_split,
+                                           self.min_samples_leaf,
+                                           min_weight_leaf,
+                                           max_depth,
+                                           max_leaf_nodes)
+
+        builder.build(self.tree_, X, y, sample_weight)
+
+        if self.n_outputs_ == 1:
+            self.n_classes_ = self.n_classes_[0]
+            self.classes_ = self.classes_[0]
+
+        return self
+
+    def predict(self, X, check_input=True):
+        """Predict class or regression value for X.
+
+        For a classification model, the predicted class for each sample in X is
+        returned. For a regression model, the predicted value based on X is
+        returned.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape = [n_samples, n_features]
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        check_input : boolean, (default=True)
+            Allow to bypass several input checking.
+            Don't use this parameter unless you know what you do.
+
+        Returns
+        -------
+        y : array of shape = [n_samples] or [n_samples, n_outputs]
+            The predicted classes, or the predict values.
+        """
+        if check_input:
+            X = check_array(X, dtype=DTYPE)
+
+        n_samples, n_features = X.shape
+
+        if self.tree_ is None:
+            raise NotFittedError("Tree not initialized. Perform a fit first")
+
+        if self.n_features_ != n_features:
+            raise ValueError("Number of features of the model must "
+                             " match the input. Model n_features is %s and "
+                             " input n_features is %s "
+                             % (self.n_features_, n_features))
+
+        proba = self.tree_.predict(X)
+
+        # Classification
+        if isinstance(self, ClassifierMixin):
+            if self.n_outputs_ == 1:
+                return self.classes_.take(np.argmax(proba, axis=1), axis=0)
+
+            else:
+                predictions = np.zeros((n_samples, self.n_outputs_))
+
+                for k in range(self.n_outputs_):
+                    predictions[:, k] = self.classes_[k].take(
+                        np.argmax(proba[:, k], axis=1),
+                        axis=0)
+
+                return predictions
+
+        # Regression
+        else:
+            if self.n_outputs_ == 1:
+                return proba[:, 0]
+
+            else:
+                return proba[:, :, 0]
+
+    @property
+    def feature_importances_(self):
+        """Return the feature importances.
+
+        The importance of a feature is computed as the (normalized) total
+        reduction of the criterion brought by that feature.
+        It is also known as the Gini importance.
+
+        Returns
+        -------
+        feature_importances_ : array, shape = [n_features]
+        """
+        if self.tree_ is None:
+            raise NotFittedError("Estimator not fitted, call `fit` before"
+                                 " `feature_importances_`.")
+
+        return self.tree_.compute_feature_importances()
+
+
+# =============================================================================
+# Public estimators
+# =============================================================================
+class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
+    """A decision tree classifier.
+
+    Parameters
+    ----------
+    criterion : string, optional (default="gini")
+        The function to measure the quality of a split. Supported criteria are
+        "gini" for the Gini impurity and "entropy" for the information gain.
+
+    splitter : string, optional (default="best")
+        The strategy used to choose the split at each node. Supported
+        strategies are "best" to choose the best split and "random" to choose
+        the best random split.
+
+    max_features : int, float, string or None, optional (default=None)
+        The number of features to consider when looking for the best split:
+          - If int, then consider `max_features` features at each split.
+          - If float, then `max_features` is a percentage and
+            `int(max_features * n_features)` features are considered at each
+            split.
+          - If "auto", then `max_features=sqrt(n_features)`.
+          - If "sqrt", then `max_features=sqrt(n_features)`.
+          - If "log2", then `max_features=log2(n_features)`.
+          - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+    max_depth : int or None, optional (default=None)
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+        Ignored if ``max_leaf_nodes`` is not None.
+
+    min_samples_split : int, optional (default=2)
+        The minimum number of samples required to split an internal node.
+
+    min_samples_leaf : int, optional (default=1)
+        The minimum number of samples required to be at a leaf node.
+
+    min_weight_fraction_leaf : float, optional (default=0.)
+        The minimum weighted fraction of the input samples required to be at a
+        leaf node.
+
+    max_leaf_nodes : int or None, optional (default=None)
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+        If not None then ``max_depth`` will be ignored.
+
+    class_weight : dict, list of dicts, "auto" or None, optional (default=None)
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        The "auto" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data.
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    Attributes
+    ----------
+    tree_ : Tree object
+        The underlying Tree object.
+
+    max_features_ : int,
+        The inferred value of max_features.
+
+    classes_ : array of shape = [n_classes] or a list of such arrays
+        The classes labels (single output problem),
+        or a list of arrays of class labels (multi-output problem).
+
+    n_classes_ : int or list
+        The number of classes (for single output problems),
+        or a list containing the number of classes for each
+        output (for multi-output problems).
+
+    feature_importances_ : array of shape = [n_features]
+        The feature importances. The higher, the more important the
+        feature. The importance of a feature is computed as the (normalized)
+        total reduction of the criterion brought by that feature.  It is also
+        known as the Gini importance [4]_.
+
+    See also
+    --------
+    DecisionTreeRegressor
+
+    References
+    ----------
+
+    .. [1] http://en.wikipedia.org/wiki/Decision_tree_learning
+
+    .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, "Classification
+           and Regression Trees", Wadsworth, Belmont, CA, 1984.
+
+    .. [3] T. Hastie, R. Tibshirani and J. Friedman. "Elements of Statistical
+           Learning", Springer, 2009.
+
+    .. [4] L. Breiman, and A. Cutler, "Random Forests",
+           http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.cross_validation import cross_val_score
+    >>> from sklearn.tree import DecisionTreeClassifier
+    >>> clf = DecisionTreeClassifier(random_state=0)
+    >>> iris = load_iris()
+    >>> cross_val_score(clf, iris.data, iris.target, cv=10)
+    ...                             # doctest: +SKIP
+    ...
+    array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,
+            0.93...,  0.93...,  1.     ,  0.93...,  1.      ])
+    """
+    def __init__(self,
+                 criterion="gini",
+                 splitter="best",
+                 max_depth=None,
+                 min_samples_split=2,
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
+                 max_features=None,
+                 random_state=None,
+                 max_leaf_nodes=None,
+                 class_weight=None):
+        super(DecisionTreeClassifier, self).__init__(
+            criterion=criterion,
+            splitter=splitter,
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            min_weight_fraction_leaf=min_weight_fraction_leaf,
+            max_features=max_features,
+            max_leaf_nodes=max_leaf_nodes,
+            class_weight=class_weight,
+            random_state=random_state)
+
+    def predict_proba(self, X, check_input=True):
+        """Predict class probabilities of the input samples X.
+
+        The predicted class probability is the fraction of samples of the same
+        class in a leaf.
+
+        check_input : boolean, (default=True)
+            Allow to bypass several input checking.
+            Don't use this parameter unless you know what you do.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape = [n_samples, n_features]
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        p : array of shape = [n_samples, n_classes], or a list of n_outputs
+            such arrays if n_outputs > 1.
+            The class probabilities of the input samples. The order of the
+            classes corresponds to that in the attribute `classes_`.
+        """
+        check_is_fitted(self, 'n_outputs_')
+        if check_input:
+            X = check_array(X, dtype=DTYPE)
+
+        n_samples, n_features = X.shape
+
+        if self.tree_ is None:
+            raise NotFittedError("Tree not initialized. Perform a fit first.")
+
+        if self.n_features_ != n_features:
+            raise ValueError("Number of features of the model must "
+                             " match the input. Model n_features is %s and "
+                             " input n_features is %s "
+                             % (self.n_features_, n_features))
+
+        proba = self.tree_.predict(X)
+
+        if self.n_outputs_ == 1:
+            proba = proba[:, :self.n_classes_]
+            normalizer = proba.sum(axis=1)[:, np.newaxis]
+            normalizer[normalizer == 0.0] = 1.0
+            proba /= normalizer
+
+            return proba
+
+        else:
+            all_proba = []
+
+            for k in range(self.n_outputs_):
+                proba_k = proba[:, k, :self.n_classes_[k]]
+                normalizer = proba_k.sum(axis=1)[:, np.newaxis]
+                normalizer[normalizer == 0.0] = 1.0
+                proba_k /= normalizer
+                all_proba.append(proba_k)
+
+            return all_proba
+
+    def predict_log_proba(self, X):
+        """Predict class log-probabilities of the input samples X.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape = [n_samples, n_features]
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        p : array of shape = [n_samples, n_classes], or a list of n_outputs
+            such arrays if n_outputs > 1.
+            The class log-probabilities of the input samples. The order of the
+            classes corresponds to that in the attribute `classes_`.
+        """
+        proba = self.predict_proba(X)
+
+        if self.n_outputs_ == 1:
+            return np.log(proba)
+
+        else:
+            for k in range(self.n_outputs_):
+                proba[k] = np.log(proba[k])
+
+            return proba
+            
+class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
+    """A decision tree regressor.
+
+    Parameters
+    ----------
+    criterion : string, optional (default="mse")
+        The function to measure the quality of a split. The only supported
+        criterion is "mse" for the mean squared error.
+
+    splitter : string, optional (default="best")
+        The strategy used to choose the split at each node. Supported
+        strategies are "best" to choose the best split and "random" to choose
+        the best random split.
+
+    max_features : int, float, string or None, optional (default=None)
+        The number of features to consider when looking for the best split:
+          - If int, then consider `max_features` features at each split.
+          - If float, then `max_features` is a percentage and
+            `int(max_features * n_features)` features are considered at each
+            split.
+          - If "auto", then `max_features=n_features`.
+          - If "sqrt", then `max_features=sqrt(n_features)`.
+          - If "log2", then `max_features=log2(n_features)`.
+          - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+    max_depth : int or None, optional (default=None)
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+        Ignored if ``max_leaf_nodes`` is not None.
+
+    min_samples_split : int, optional (default=2)
+        The minimum number of samples required to split an internal node.
+
+    min_samples_leaf : int, optional (default=1)
+        The minimum number of samples required to be at a leaf node.
+
+    min_weight_fraction_leaf : float, optional (default=0.)
+        The minimum weighted fraction of the input samples required to be at a
+        leaf node.
+
+    max_leaf_nodes : int or None, optional (default=None)
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+        If not None then ``max_depth`` will be ignored.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    Attributes
+    ----------
+    tree_ : Tree object
+        The underlying Tree object.
+
+    max_features_ : int,
+        The inferred value of max_features.
+
+    feature_importances_ : array of shape = [n_features]
+        The feature importances.
+        The higher, the more important the feature.
+        The importance of a feature is computed as the
+        (normalized) total reduction of the criterion brought
+        by that feature. It is also known as the Gini importance [4]_.
+
+    See also
+    --------
+    DecisionTreeClassifier
+
+    References
+    ----------
+
+    .. [1] http://en.wikipedia.org/wiki/Decision_tree_learning
+
+    .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, "Classification
+           and Regression Trees", Wadsworth, Belmont, CA, 1984.
+
+    .. [3] T. Hastie, R. Tibshirani and J. Friedman. "Elements of Statistical
+           Learning", Springer, 2009.
+
+    .. [4] L. Breiman, and A. Cutler, "Random Forests",
+           http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_boston
+    >>> from sklearn.cross_validation import cross_val_score
+    >>> from sklearn.tree import DecisionTreeRegressor
+    >>> boston = load_boston()
+    >>> regressor = DecisionTreeRegressor(random_state=0)
+    >>> cross_val_score(regressor, boston.data, boston.target, cv=10)
+    ...                    # doctest: +SKIP
+    ...
+    array([ 0.61..., 0.57..., -0.34..., 0.41..., 0.75...,
+            0.07..., 0.29..., 0.33..., -1.42..., -1.77...])
+    """
+    def __init__(self,
+                 criterion="mse",
+                 splitter="best",
+                 max_depth=None,
+                 min_samples_split=2,
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
+                 max_features=None,
+                 random_state=None,
+                 max_leaf_nodes=None):
+        super(DecisionTreeRegressor, self).__init__(
+            criterion=criterion,
+            splitter=splitter,
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            min_weight_fraction_leaf=min_weight_fraction_leaf,
+            max_features=max_features,
+            max_leaf_nodes=max_leaf_nodes,
+            random_state=random_state)
+
+
+class ExtraTreeClassifier(DecisionTreeClassifier):
+    """An extremely randomized tree classifier.
+
+    Extra-trees differ from classic decision trees in the way they are built.
+    When looking for the best split to separate the samples of a node into two
+    groups, random splits are drawn for each of the `max_features` randomly
+    selected features and the best split among those is chosen. When
+    `max_features` is set 1, this amounts to building a totally random
+    decision tree.
+
+    Warning: Extra-trees should only be used within ensemble methods.
+
+    See also
+    --------
+    ExtraTreeRegressor, ExtraTreesClassifier, ExtraTreesRegressor
+
+    References
+    ----------
+
+    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees",
+           Machine Learning, 63(1), 3-42, 2006.
+    """
+    def __init__(self,
+                 criterion="gini",
+                 splitter="random",
+                 max_depth=None,
+                 min_samples_split=2,
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
+                 max_features="auto",
+                 random_state=None,
+                 max_leaf_nodes=None,
+                 class_weight=None):
+        super(ExtraTreeClassifier, self).__init__(
+            criterion=criterion,
+            splitter=splitter,
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            min_weight_fraction_leaf=min_weight_fraction_leaf,
+            max_features=max_features,
+            max_leaf_nodes=max_leaf_nodes,
+            class_weight=class_weight,
+            random_state=random_state)
+
+
+class ExtraTreeRegressor(DecisionTreeRegressor):
+    """An extremely randomized tree regressor.
+
+    Extra-trees differ from classic decision trees in the way they are built.
+    When looking for the best split to separate the samples of a node into two
+    groups, random splits are drawn for each of the `max_features` randomly
+    selected features and the best split among those is chosen. When
+    `max_features` is set 1, this amounts to building a totally random
+    decision tree.
+
+    Warning: Extra-trees should only be used within ensemble methods.
+
+    See also
+    --------
+    ExtraTreeClassifier, ExtraTreesClassifier, ExtraTreesRegressor
+
+    References
+    ----------
+
+    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees",
+           Machine Learning, 63(1), 3-42, 2006.
+    """
+    def __init__(self,
+                 criterion="mse",
+                 splitter="random",
+                 max_depth=None,
+                 min_samples_split=2,
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
+                 max_features="auto",
+                 random_state=None,
+                 max_leaf_nodes=None):
+        super(ExtraTreeRegressor, self).__init__(
+            criterion=criterion,
+            splitter=splitter,
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            min_weight_fraction_leaf=min_weight_fraction_leaf,
+            max_features=max_features,
+            max_leaf_nodes=max_leaf_nodes,
+            random_state=random_state)
--- a/python/isaac/external/sklearn/utils.py
+++ b/python/isaac/external/sklearn/utils.py
@@ -0,0 +1,911 @@
+import sys
+import inspect
+import warnings
+import numbers
+
+import numpy as np
+
+################################ six   ########################################
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+    string_types = str
+    _iteritems = "items"
+
+else:
+    string_types = basestring
+    _iteritems = "iteritems"
+
+
+def iteritems(d, **kw):
+    """Return an iterator over the (key, value) pairs of a dictionary."""
+    return iter(getattr(d, _iteritems)(**kw))
+    
+################################ utils ########################################
+
+if np.version < (1, 6, 2):
+    # Allow bincount to accept empty arrays
+    # https://github.com/numpy/numpy/commit/40f0844846a9d7665616b142407a3d74cb65a040
+    def bincount(x, weights=None, minlength=None):
+        if len(x) > 0:
+            return np.bincount(x, weights, minlength)
+        else:
+            if minlength is None:
+                minlength = 0
+            minlength = np.asscalar(np.asarray(minlength, dtype=np.intp))
+            return np.zeros(minlength, dtype=np.intp)
+
+else:
+    from numpy import bincount
+    
+class DataConversionWarning(UserWarning):
+    """A warning on implicit data conversions happening in the code"""
+    pass
+    
+class NotFittedError(ValueError, AttributeError):
+    """Exception class to raise if estimator is used before fitting
+
+    This class inherits from both ValueError and AttributeError to help with
+    exception handling and backward compatibility.
+    """
+
+def check_is_fitted(estimator, attributes, msg=None, all_or_any=all):
+    """Perform is_fitted validation for estimator.
+
+    Checks if the estimator is fitted by verifying the presence of
+    "all_or_any" of the passed attributes and raises a NotFittedError with the
+    given message.
+
+    Parameters
+    ----------
+    estimator : estimator instance.
+        estimator instance for which the check is performed.
+
+    attributes : attribute name(s) given as string or a list/tuple of strings
+        Eg. : ["coef_", "estimator_", ...], "coef_"
+
+    msg : string
+        The default error message is, "This %(name)s instance is not fitted
+        yet. Call 'fit' with appropriate arguments before using this method."
+
+        For custom messages if "%(name)s" is present in the message string,
+        it is substituted for the estimator name.
+
+        Eg. : "Estimator, %(name)s, must be fitted before sparsifying".
+
+    all_or_any : callable, {all, any}, default all
+        Specify whether all or any of the given attributes must exist.
+    """
+    if msg is None:
+        msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
+               "appropriate arguments before using this method.")
+
+    if not hasattr(estimator, 'fit'):
+        raise TypeError("%s is not an estimator instance." % (estimator))
+
+    if not isinstance(attributes, (list, tuple)):
+        attributes = [attributes]
+
+    if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
+        raise NotFittedError(msg % {'name': type(estimator).__name__})
+        
+def compute_sample_weight(class_weight, y, indices=None):
+    """Estimate sample weights by class for unbalanced datasets.
+
+    Parameters
+    ----------
+    class_weight : dict, list of dicts, "auto", or None, optional
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        The "auto" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data.
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+    y : array-like, shape = [n_samples] or [n_samples, n_outputs]
+        Array of original class labels per sample.
+
+    indices : array-like, shape (n_subsample,), or None
+        Array of indices to be used in a subsample. Can be of length less than
+        n_samples in the case of a subsample, or equal to n_samples in the
+        case of a bootstrap subsample with repeated indices. If None, the
+        sample weight will be calculated over the full sample. Only "auto" is
+        supported for class_weight if this is provided.
+
+    Returns
+    -------
+    sample_weight_vect : ndarray, shape (n_samples,)
+        Array with sample weights as applied to the original y
+    """
+
+    y = np.atleast_1d(y)
+    if y.ndim == 1:
+        y = np.reshape(y, (-1, 1))
+    n_outputs = y.shape[1]
+
+    if isinstance(class_weight, string_types):
+        if class_weight != 'auto':
+            raise ValueError('The only valid preset for class_weight is '
+                             '"auto". Given "%s".' % class_weight)
+    elif (indices is not None and
+          not isinstance(class_weight, string_types)):
+        raise ValueError('The only valid class_weight for subsampling is '
+                         '"auto". Given "%s".' % class_weight)
+    elif n_outputs > 1:
+        if (not hasattr(class_weight, "__iter__") or
+                isinstance(class_weight, dict)):
+            raise ValueError("For multi-output, class_weight should be a "
+                             "list of dicts, or a valid string.")
+        if len(class_weight) != n_outputs:
+            raise ValueError("For multi-output, number of elements in "
+                             "class_weight should match number of outputs.")
+
+    expanded_class_weight = []
+    for k in range(n_outputs):
+
+        y_full = y[:, k]
+        classes_full = np.unique(y_full)
+        classes_missing = None
+
+        if class_weight == 'auto' or n_outputs == 1:
+            class_weight_k = class_weight
+        else:
+            class_weight_k = class_weight[k]
+
+        if indices is not None:
+            # Get class weights for the subsample, covering all classes in
+            # case some labels that were present in the original data are
+            # missing from the sample.
+            y_subsample = y[indices, k]
+            classes_subsample = np.unique(y_subsample)
+
+            weight_k = np.choose(np.searchsorted(classes_subsample,
+                                                 classes_full),
+                                 compute_class_weight(class_weight_k,
+                                                      classes_subsample,
+                                                      y_subsample),
+                                 mode='clip')
+
+            classes_missing = set(classes_full) - set(classes_subsample)
+        else:
+            weight_k = compute_class_weight(class_weight_k,
+                                            classes_full,
+                                            y_full)
+
+        weight_k = weight_k[np.searchsorted(classes_full, y_full)]
+
+        if classes_missing:
+            # Make missing classes' weight zero
+            weight_k[in1d(y_full, list(classes_missing))] = 0.
+
+        expanded_class_weight.append(weight_k)
+
+    expanded_class_weight = np.prod(expanded_class_weight,
+                                    axis=0,
+                                    dtype=np.float64)
+
+    return expanded_class_weight
+    
+def _assert_all_finite(X):
+    """Like assert_all_finite, but only for ndarray."""
+    X = np.asanyarray(X)
+    # First try an O(n) time, O(1) space solution for the common case that
+    # everything is finite; fall back to O(n) space np.isfinite to prevent
+    # false positives from overflow in sum method.
+    if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum())
+            and not np.isfinite(X).all()):
+        raise ValueError("Input contains NaN, infinity"
+                         " or a value too large for %r." % X.dtype)
+                         
+def check_array(array, accept_sparse=None, dtype="numeric", order=None,
+                copy=False, force_all_finite=True, ensure_2d=True,
+                allow_nd=False, ensure_min_samples=1, ensure_min_features=1):
+    """Input validation on an array, list, sparse matrix or similar.
+
+    By default, the input is converted to an at least 2nd numpy array.
+    If the dtype of the array is object, attempt converting to float,
+    raising on failure.
+
+    Parameters
+    ----------
+    array : object
+        Input object to check / convert.
+
+    accept_sparse : string, list of string or None (default=None)
+        String[s] representing allowed sparse matrix formats, such as 'csc',
+        'csr', etc.  None means that sparse matrix input will raise an error.
+        If the input is sparse but not in the allowed format, it will be
+        converted to the first listed format.
+
+    dtype : string, type or None (default="numeric")
+        Data type of result. If None, the dtype of the input is preserved.
+        If "numeric", dtype is preserved unless array.dtype is object.
+
+    order : 'F', 'C' or None (default=None)
+        Whether an array will be forced to be fortran or c-style.
+
+    copy : boolean (default=False)
+        Whether a forced copy will be triggered. If copy=False, a copy might
+        be triggered by a conversion.
+
+    force_all_finite : boolean (default=True)
+        Whether to raise an error on np.inf and np.nan in X.
+
+    ensure_2d : boolean (default=True)
+        Whether to make X at least 2d.
+
+    allow_nd : boolean (default=False)
+        Whether to allow X.ndim > 2.
+
+    ensure_min_samples : int (default=1)
+        Make sure that the array has a minimum number of samples in its first
+        axis (rows for a 2D array). Setting to 0 disables this check.
+
+    ensure_min_features : int (default=1)
+        Make sure that the 2D array has some minimum number of features
+        (columns). The default value of 1 rejects empty datasets.
+        This check is only enforced when the input data has effectively 2
+        dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0
+        disables this check.
+
+    Returns
+    -------
+    X_converted : object
+        The converted and validated X.
+    """
+    if isinstance(accept_sparse, str):
+        accept_sparse = [accept_sparse]
+
+    # store whether originally we wanted numeric dtype
+    dtype_numeric = dtype == "numeric"
+
+    if ensure_2d:
+        array = np.atleast_2d(array)
+    if dtype_numeric:
+        if hasattr(array, "dtype") and getattr(array.dtype, "kind", None) == "O":
+            # if input is object, convert to float.
+            dtype = np.float64
+        else:
+            dtype = None
+    array = np.array(array, dtype=dtype, order=order, copy=copy)
+    # make sure we actually converted to numeric:
+    if dtype_numeric and array.dtype.kind == "O":
+        array = array.astype(np.float64)
+    if not allow_nd and array.ndim >= 3:
+        raise ValueError("Found array with dim %d. Expected <= 2" %
+                         array.ndim)
+    if force_all_finite:
+        _assert_all_finite(array)
+
+    shape_repr = _shape_repr(array.shape)
+    if ensure_min_samples > 0:
+        n_samples = _num_samples(array)
+        if n_samples < ensure_min_samples:
+            raise ValueError("Found array with %d sample(s) (shape=%s) while a"
+                             " minimum of %d is required."
+                             % (n_samples, shape_repr, ensure_min_samples))
+
+    if ensure_min_features > 0 and array.ndim == 2:
+        n_features = array.shape[1]
+        if n_features < ensure_min_features:
+            raise ValueError("Found array with %d feature(s) (shape=%s) while"
+                             " a minimum of %d is required."
+                             % (n_features, shape_repr, ensure_min_features))
+    return array
+    
+def check_random_state(seed):
+    """Turn seed into a np.random.RandomState instance
+
+    If seed is None, return the RandomState singleton used by np.random.
+    If seed is an int, return a new RandomState instance seeded with seed.
+    If seed is already a RandomState instance, return it.
+    Otherwise raise ValueError.
+    """
+    if seed is None or seed is np.random:
+        return np.random.mtrand._rand
+    if isinstance(seed, (numbers.Integral, np.integer)):
+        return np.random.RandomState(seed)
+    if isinstance(seed, np.random.RandomState):
+        return seed
+    raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
+                     ' instance' % seed)
+    
+def _shape_repr(shape):
+    """Return a platform independent reprensentation of an array shape
+
+    Under Python 2, the `long` type introduces an 'L' suffix when using the
+    default %r format for tuples of integers (typically used to store the shape
+    of an array).
+
+    Under Windows 64 bit (and Python 2), the `long` type is used by default
+    in numpy shapes even when the integer dimensions are well below 32 bit.
+    The platform specific type causes string messages or doctests to change
+    from one platform to another which is not desirable.
+
+    Under Python 3, there is no more `long` type so the `L` suffix is never
+    introduced in string representation.
+
+    >>> _shape_repr((1, 2))
+    '(1, 2)'
+    >>> one = 2 ** 64 / 2 ** 64  # force an upcast to `long` under Python 2
+    >>> _shape_repr((one, 2 * one))
+    '(1, 2)'
+    >>> _shape_repr((1,))
+    '(1,)'
+    >>> _shape_repr(())
+    '()'
+    """
+    if len(shape) == 0:
+        return "()"
+    joined = ", ".join("%d" % e for e in shape)
+    if len(shape) == 1:
+        # special notation for singleton tuples
+        joined += ','
+    return "(%s)" % joined
+
+def _num_samples(x):
+    """Return number of samples in array-like x."""
+    if hasattr(x, 'fit'):
+        # Don't get num_samples from an ensembles length!
+        raise TypeError('Expected sequence or array-like, got '
+                        'estimator %s' % x)
+    if not hasattr(x, '__len__') and not hasattr(x, 'shape'):
+        if hasattr(x, '__array__'):
+            x = np.asarray(x)
+        else:
+            raise TypeError("Expected sequence or array-like, got %s" %
+                            type(x))
+    if hasattr(x, 'shape'):
+        if len(x.shape) == 0:
+            raise TypeError("Singleton array %r cannot be considered"
+                            " a valid collection." % x)
+        return x.shape[0]
+    else:
+        return len(x)
+################################ metrics ########################################
+def _weighted_sum(sample_score, sample_weight, normalize=False):
+    if normalize:
+        return np.average(sample_score, weights=sample_weight)
+    elif sample_weight is not None:
+        return np.dot(sample_score, sample_weight)
+    else:
+        return sample_score.sum()
+
+def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None):
+    """Accuracy classification score.
+
+    In multilabel classification, this function computes subset accuracy:
+    the set of labels predicted for a sample must *exactly* match the
+    corresponding set of labels in y_true.
+
+    Parameters
+    ----------
+    y_true : 1d array-like, or label indicator array / sparse matrix
+        Ground truth (correct) labels.
+
+    y_pred : 1d array-like, or label indicator array / sparse matrix
+        Predicted labels, as returned by a classifier.
+
+    normalize : bool, optional (default=True)
+        If ``False``, return the number of correctly classified samples.
+        Otherwise, return the fraction of correctly classified samples.
+
+    sample_weight : array-like of shape = [n_samples], optional
+        Sample weights.
+
+    Returns
+    -------
+    score : float
+        If ``normalize == True``, return the correctly classified samples
+        (float), else it returns the number of correctly classified samples
+        (int).
+
+        The best performance is 1 with ``normalize == True`` and the number
+        of samples with ``normalize == False``.
+
+    See also
+    --------
+    jaccard_similarity_score, hamming_loss, zero_one_loss
+
+    Notes
+    -----
+    In binary and multiclass classification, this function is equal
+    to the ``jaccard_similarity_score`` function.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.metrics import accuracy_score
+    >>> y_pred = [0, 2, 1, 3]
+    >>> y_true = [0, 1, 2, 3]
+    >>> accuracy_score(y_true, y_pred)
+    0.5
+    >>> accuracy_score(y_true, y_pred, normalize=False)
+    2
+
+    In the multilabel case with binary label indicators:
+    >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
+    0.5
+    """
+
+    # Compute accuracy for each possible representation
+    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
+    if y_type.startswith('multilabel'):
+        differing_labels = count_nonzero(y_true - y_pred, axis=1)
+        score = differing_labels == 0
+    else:
+        score = y_true == y_pred
+
+    return _weighted_sum(score, sample_weight, normalize)
+    
+def r2_score(y_true, y_pred, sample_weight=None):
+    """R^2 (coefficient of determination) regression score function.
+
+    Best possible score is 1.0, lower values are worse.
+
+    Parameters
+    ----------
+    y_true : array-like of shape = [n_samples] or [n_samples, n_outputs]
+        Ground truth (correct) target values.
+
+    y_pred : array-like of shape = [n_samples] or [n_samples, n_outputs]
+        Estimated target values.
+
+    sample_weight : array-like of shape = [n_samples], optional
+        Sample weights.
+
+    Returns
+    -------
+    z : float
+        The R^2 score.
+
+    Notes
+    -----
+    This is not a symmetric function.
+
+    Unlike most other scores, R^2 score may be negative (it need not actually
+    be the square of a quantity R).
+
+    References
+    ----------
+    .. [1] `Wikipedia entry on the Coefficient of determination
+            <http://en.wikipedia.org/wiki/Coefficient_of_determination>`_
+
+    Examples
+    --------
+    >>> from sklearn.metrics import r2_score
+    >>> y_true = [3, -0.5, 2, 7]
+    >>> y_pred = [2.5, 0.0, 2, 8]
+    >>> r2_score(y_true, y_pred)  # doctest: +ELLIPSIS
+    0.948...
+    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
+    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
+    >>> r2_score(y_true, y_pred)  # doctest: +ELLIPSIS
+    0.938...
+
+    """
+    y_type, y_true, y_pred = _check_reg_targets(y_true, y_pred)
+
+    if sample_weight is not None:
+        sample_weight = column_or_1d(sample_weight)
+        weight = sample_weight[:, np.newaxis]
+    else:
+        weight = 1.
+
+    numerator = (weight * (y_true - y_pred) ** 2).sum(dtype=np.float64)
+    denominator = (weight * (y_true - np.average(
+        y_true, axis=0, weights=sample_weight)) ** 2).sum(dtype=np.float64)
+
+    if denominator == 0.0:
+        if numerator == 0.0:
+            return 1.0
+        else:
+            # arbitrary set to zero to avoid -inf scores, having a constant
+            # y_true is not interesting for scoring a regression anyway
+            return 0.0
+
+    return 1 - numerator / denominator
+
+
+################################   base   #########################################
+###############################################################################
+
+    
+class BaseEstimator(object):
+    """Base class for all estimators in scikit-learn
+
+    Notes
+    -----
+    All estimators should specify all the parameters that can be set
+    at the class level in their ``__init__`` as explicit keyword
+    arguments (no ``*args`` or ``**kwargs``).
+    """
+
+    @classmethod
+    def _get_param_names(cls):
+        """Get parameter names for the estimator"""
+        # fetch the constructor or the original constructor before
+        # deprecation wrapping if any
+        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
+        if init is object.__init__:
+            # No explicit constructor to introspect
+            return []
+
+        # introspect the constructor arguments to find the model parameters
+        # to represent
+        args, varargs, kw, default = inspect.getargspec(init)
+        if varargs is not None:
+            raise RuntimeError("scikit-learn estimators should always "
+                               "specify their parameters in the signature"
+                               " of their __init__ (no varargs)."
+                               " %s doesn't follow this convention."
+                               % (cls, ))
+        # Remove 'self'
+        # XXX: This is going to fail if the init is a staticmethod, but
+        # who would do this?
+        args.pop(0)
+        args.sort()
+        return args
+
+    def get_params(self, deep=True):
+        """Get parameters for this estimator.
+
+        Parameters
+        ----------
+        deep: boolean, optional
+            If True, will return the parameters for this estimator and
+            contained subobjects that are estimators.
+
+        Returns
+        -------
+        params : mapping of string to any
+            Parameter names mapped to their values.
+        """
+        out = dict()
+        for key in self._get_param_names():
+            # We need deprecation warnings to always be on in order to
+            # catch deprecated param values.
+            # This is set in utils/__init__.py but it gets overwritten
+            # when running under python3 somehow.
+            warnings.simplefilter("always", DeprecationWarning)
+            try:
+                with warnings.catch_warnings(record=True) as w:
+                    value = getattr(self, key, None)
+                if len(w) and w[0].category == DeprecationWarning:
+                    # if the parameter is deprecated, don't show it
+                    continue
+            finally:
+                warnings.filters.pop(0)
+
+            # XXX: should we rather test if instance of estimator?
+            if deep and hasattr(value, 'get_params'):
+                deep_items = value.get_params().items()
+                out.update((key + '__' + k, val) for k, val in deep_items)
+            out[key] = value
+        return out
+
+    def set_params(self, **params):
+        """Set the parameters of this estimator.
+
+        The method works on simple estimators as well as on nested objects
+        (such as pipelines). The former have parameters of the form
+        ``<component>__<parameter>`` so that it's possible to update each
+        component of a nested object.
+
+        Returns
+        -------
+        self
+        """
+        if not params:
+            # Simple optimisation to gain speed (inspect is slow)
+            return self
+        valid_params = self.get_params(deep=True)
+        for key, value in iteritems(params):
+            split = key.split('__', 1)
+            if len(split) > 1:
+                # nested objects case
+                name, sub_name = split
+                if not name in valid_params:
+                    raise ValueError('Invalid parameter %s for estimator %s' %
+                                     (name, self))
+                sub_object = valid_params[name]
+                sub_object.set_params(**{sub_name: value})
+            else:
+                # simple objects case
+                if not key in valid_params:
+                    raise ValueError('Invalid parameter %s ' 'for estimator %s'
+                                     % (key, self.__class__.__name__))
+                setattr(self, key, value)
+        return self
+
+    def __repr__(self):
+        class_name = self.__class__.__name__
+        return '%s(%s)' % (class_name, _pprint(self.get_params(deep=False),
+                                               offset=len(class_name),),)
+
+class MetaEstimatorMixin(object):
+    """Mixin class for all meta estimators in scikit-learn."""
+    # this is just a tag for the moment
+    
+    
+class ClassifierMixin(object):
+    """Mixin class for all classifiers in scikit-learn."""
+
+    def score(self, X, y, sample_weight=None):
+        """Returns the mean accuracy on the given test data and labels.
+
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Test samples.
+
+        y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+            True labels for X.
+
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of self.predict(X) wrt. y.
+
+        """
+        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
+        
+class RegressorMixin(object):
+    """Mixin class for all regression estimators in scikit-learn."""
+
+    def score(self, X, y, sample_weight=None):
+        """Returns the coefficient of determination R^2 of the prediction.
+
+        The coefficient R^2 is defined as (1 - u/v), where u is the regression
+        sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual
+        sum of squares ((y_true - y_true.mean()) ** 2).sum().
+        Best possible score is 1.0, lower values are worse.
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Test samples.
+
+        y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+            True values for X.
+
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            R^2 of self.predict(X) wrt. y.
+        """
+        return r2_score(y, self.predict(X), sample_weight=sample_weight)
+        
+######################### Preprocessing ################################
+
+class OneHotEncoder:
+    """Encode categorical integer features using a one-hot aka one-of-K scheme.
+
+    The input to this transformer should be a matrix of integers, denoting
+    the values taken on by categorical (discrete) features. The output will be
+    a sparse matrix where each column corresponds to one possible value of one
+    feature. It is assumed that input features take on values in the range
+    [0, n_values).
+
+    This encoding is needed for feeding categorical data to many scikit-learn
+    estimators, notably linear models and SVMs with the standard kernels.
+
+    Parameters
+    ----------
+    n_values : 'auto', int or array of ints
+        Number of values per feature.
+
+        - 'auto' : determine value range from training data.
+        - int : maximum value for all features.
+        - array : maximum value per feature.
+
+    categorical_features: "all" or array of indices or mask
+        Specify what features are treated as categorical.
+
+        - 'all' (default): All features are treated as categorical.
+        - array of indices: Array of categorical feature indices.
+        - mask: Array of length n_features and with dtype=bool.
+
+        Non-categorical features are always stacked to the right of the matrix.
+
+    dtype : number type, default=np.float
+        Desired dtype of output.
+
+    sparse : boolean, default=True
+        Will return sparse matrix if set True else will return an array.
+
+    handle_unknown : str, 'error' or 'ignore'
+        Whether to raise an error or ignore if a unknown categorical feature is
+        present during transform.
+
+    Attributes
+    ----------
+    active_features_ : array
+        Indices for active features, meaning values that actually occur
+        in the training set. Only available when n_values is ``'auto'``.
+
+    feature_indices_ : array of shape (n_features,)
+        Indices to feature ranges.
+        Feature ``i`` in the original data is mapped to features
+        from ``feature_indices_[i]`` to ``feature_indices_[i+1]``
+        (and then potentially masked by `active_features_` afterwards)
+
+    n_values_ : array of shape (n_features,)
+        Maximum number of values per feature.
+
+    Examples
+    --------
+    Given a dataset with three features and two samples, we let the encoder
+    find the maximum value per feature and transform the data to a binary
+    one-hot encoding.
+
+    >>> from sklearn.preprocessing import OneHotEncoder
+    >>> enc = OneHotEncoder()
+    >>> enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], \
+[1, 0, 2]])  # doctest: +ELLIPSIS
+    OneHotEncoder(categorical_features='all', dtype=<... 'float'>,
+           handle_unknown='error', n_values='auto', sparse=True)
+    >>> enc.n_values_
+    array([2, 3, 4])
+    >>> enc.feature_indices_
+    array([0, 2, 5, 9])
+    >>> enc.transform([[0, 1, 1]]).toarray()
+    array([[ 1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.]])
+
+    See also
+    --------
+    sklearn.feature_extraction.DictVectorizer : performs a one-hot encoding of
+      dictionary items (also handles string-valued features).
+    sklearn.feature_extraction.FeatureHasher : performs an approximate one-hot
+      encoding of dictionary items or strings.
+    """
+    def __init__(self, n_values="auto", categorical_features="all",
+                 dtype=np.float, sparse=True, handle_unknown='error'):
+        self.n_values = n_values
+        self.categorical_features = categorical_features
+        self.dtype = dtype
+        self.sparse = sparse
+        self.handle_unknown = handle_unknown
+
+    def fit(self, X, y=None):
+        """Fit OneHotEncoder to X.
+
+        Parameters
+        ----------
+        X : array-like, shape=(n_samples, n_feature)
+            Input array of type int.
+
+        Returns
+        -------
+        self
+        """
+        self.fit_transform(X)
+        return self
+
+    def _fit_transform(self, X):
+        """Assumes X contains only categorical features."""
+        X = check_array(X, dtype=np.int)
+        if np.any(X < 0):
+            raise ValueError("X needs to contain only non-negative integers.")
+        n_samples, n_features = X.shape
+        if self.n_values == 'auto':
+            n_values = np.max(X, axis=0) + 1
+        elif isinstance(self.n_values, numbers.Integral):
+            if (np.max(X, axis=0) >= self.n_values).any():
+                raise ValueError("Feature out of bounds for n_values=%d"
+                                 % self.n_values)
+            n_values = np.empty(n_features, dtype=np.int)
+            n_values.fill(self.n_values)
+        else:
+            try:
+                n_values = np.asarray(self.n_values, dtype=int)
+            except (ValueError, TypeError):
+                raise TypeError("Wrong type for parameter `n_values`. Expected"
+                                " 'auto', int or array of ints, got %r"
+                                % type(X))
+            if n_values.ndim < 1 or n_values.shape[0] != X.shape[1]:
+                raise ValueError("Shape mismatch: if n_values is an array,"
+                                 " it has to be of shape (n_features,).")
+
+        self.n_values_ = n_values
+        n_values = np.hstack([[0], n_values])
+        indices = np.cumsum(n_values)
+        self.feature_indices_ = indices
+
+        column_indices = (X + indices[:-1]).ravel()
+        row_indices = np.repeat(np.arange(n_samples, dtype=np.int32),
+                                n_features)
+        #data = np.ones(n_samples * n_features)
+        #out = sparse.coo_matrix((data, (row_indices, column_indices)),
+        #                        shape=(n_samples, indices[-1]),
+        #                        dtype=self.dtype).tocsr()
+        data = np.zeros(n_samples, indices[-1], dtype = self.dtype)
+        data[row_indices, column_indices] = 1
+        
+        print out
+        
+        if self.n_values == 'auto':
+            mask = np.array(out.sum(axis=0)).ravel() != 0
+            active_features = np.where(mask)[0]
+            out = out[:, active_features]
+            self.active_features_ = active_features
+
+        return out if self.sparse else out.toarray()
+
+    def fit_transform(self, X, y=None):
+        """Fit OneHotEncoder to X, then transform X.
+
+        Equivalent to self.fit(X).transform(X), but more convenient and more
+        efficient. See fit for the parameters, transform for the return value.
+        """
+        return _transform_selected(X, self._fit_transform,
+                                   self.categorical_features, copy=True)
+
+    def _transform(self, X):
+        """Assumes X contains only categorical features."""
+        X = check_array(X, dtype=np.int)
+        if np.any(X < 0):
+            raise ValueError("X needs to contain only non-negative integers.")
+        n_samples, n_features = X.shape
+
+        indices = self.feature_indices_
+        if n_features != indices.shape[0] - 1:
+            raise ValueError("X has different shape than during fitting."
+                             " Expected %d, got %d."
+                             % (indices.shape[0] - 1, n_features))
+
+        # We use only those catgorical features of X that are known using fit.
+        # i.e lesser than n_values_ using mask.
+        # This means, if self.handle_unknown is "ignore", the row_indices and
+        # col_indices corresponding to the unknown categorical feature are
+        # ignored.
+        mask = (X < self.n_values_).ravel()
+        if np.any(~mask):
+            if self.handle_unknown not in ['error', 'ignore']:
+                raise ValueError("handle_unknown should be either error or "
+                                 "unknown got %s" % self.handle_unknown)
+            if self.handle_unknown == 'error':
+                raise ValueError("unknown categorical feature present %s "
+                                 "during transform." % X[~mask])
+
+        column_indices = (X + indices[:-1]).ravel()[mask]
+        row_indices = np.repeat(np.arange(n_samples, dtype=np.int32),
+                                n_features)[mask]
+        data = np.ones(np.sum(mask))
+        out = sparse.coo_matrix((data, (row_indices, column_indices)),
+                                shape=(n_samples, indices[-1]),
+                                dtype=self.dtype).tocsr()
+        if self.n_values == 'auto':
+            out = out[:, self.active_features_]
+
+        return out if self.sparse else out.toarray()
+
+    def transform(self, X):
+        """Transform X using one-hot encoding.
+
+        Parameters
+        ----------
+        X : array-like, shape=(n_samples, n_features)
+            Input array of type int.
+
+        Returns
+        -------
+        X_out : sparse matrix if sparse=True else a 2-d array, dtype=int
+            Transformed input.
+        """
+        return _transform_selected(X, self._transform,
+                                   self.categorical_features, copy=True)