Python: added standalone decision tree from sklearn
This commit is contained in:
12
python/isaac/external/__init__.py
vendored
Normal file
12
python/isaac/external/__init__.py
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
"""
|
||||||
|
The :mod:`sklearn.tree` module includes decision tree-based models for
|
||||||
|
classification and regression.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .tree import DecisionTreeClassifier
|
||||||
|
from .tree import DecisionTreeRegressor
|
||||||
|
from .tree import ExtraTreeClassifier
|
||||||
|
from .tree import ExtraTreeRegressor
|
||||||
|
|
||||||
|
__all__ = ["DecisionTreeClassifier", "DecisionTreeRegressor",
|
||||||
|
"ExtraTreeClassifier", "ExtraTreeRegressor"]
|
BIN
python/isaac/external/__init__.pyc
vendored
Normal file
BIN
python/isaac/external/__init__.pyc
vendored
Normal file
Binary file not shown.
37211
python/isaac/external/_tree.c
vendored
Normal file
37211
python/isaac/external/_tree.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
274
python/isaac/external/_tree.pxd
vendored
Normal file
274
python/isaac/external/_tree.pxd
vendored
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
# Authors: Gilles Louppe <g.louppe@gmail.com>
|
||||||
|
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
|
||||||
|
# Brian Holt <bdholt1@gmail.com>
|
||||||
|
# Joel Nothman <joel.nothman@gmail.com>
|
||||||
|
# Arnaud Joly <arnaud.v.joly@gmail.com>
|
||||||
|
#
|
||||||
|
# Licence: BSD 3 clause
|
||||||
|
|
||||||
|
# See _tree.pyx for details.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
cimport numpy as np
|
||||||
|
|
||||||
|
ctypedef np.npy_float32 DTYPE_t # Type of X
|
||||||
|
ctypedef np.npy_float64 DOUBLE_t # Type of y, sample_weight
|
||||||
|
ctypedef np.npy_intp SIZE_t # Type for indices and counters
|
||||||
|
ctypedef np.npy_int32 INT32_t # Signed 32 bit integer
|
||||||
|
ctypedef np.npy_uint32 UINT32_t # Unsigned 32 bit integer
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Stack data structure
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# A record on the stack for depth-first tree growing
|
||||||
|
cdef struct StackRecord:
|
||||||
|
SIZE_t start
|
||||||
|
SIZE_t end
|
||||||
|
SIZE_t depth
|
||||||
|
SIZE_t parent
|
||||||
|
bint is_left
|
||||||
|
double impurity
|
||||||
|
SIZE_t n_constant_features
|
||||||
|
|
||||||
|
cdef class Stack:
|
||||||
|
cdef SIZE_t capacity
|
||||||
|
cdef SIZE_t top
|
||||||
|
cdef StackRecord* stack_
|
||||||
|
|
||||||
|
cdef bint is_empty(self) nogil
|
||||||
|
cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent,
|
||||||
|
bint is_left, double impurity,
|
||||||
|
SIZE_t n_constant_features) nogil
|
||||||
|
cdef int pop(self, StackRecord* res) nogil
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# PriorityHeap data structure
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# A record on the frontier for best-first tree growing
|
||||||
|
cdef struct PriorityHeapRecord:
|
||||||
|
SIZE_t node_id
|
||||||
|
SIZE_t start
|
||||||
|
SIZE_t end
|
||||||
|
SIZE_t pos
|
||||||
|
SIZE_t depth
|
||||||
|
bint is_leaf
|
||||||
|
double impurity
|
||||||
|
double impurity_left
|
||||||
|
double impurity_right
|
||||||
|
double improvement
|
||||||
|
|
||||||
|
cdef class PriorityHeap:
|
||||||
|
cdef SIZE_t capacity
|
||||||
|
cdef SIZE_t heap_ptr
|
||||||
|
cdef PriorityHeapRecord* heap_
|
||||||
|
|
||||||
|
cdef bint is_empty(self) nogil
|
||||||
|
cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
|
||||||
|
SIZE_t depth, bint is_leaf, double improvement,
|
||||||
|
double impurity, double impurity_left,
|
||||||
|
double impurity_right) nogil
|
||||||
|
cdef int pop(self, PriorityHeapRecord* res) nogil
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Criterion
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
cdef class Criterion:
|
||||||
|
# The criterion computes the impurity of a node and the reduction of
|
||||||
|
# impurity of a split on that node. It also computes the output statistics
|
||||||
|
# such as the mean in regression and class probabilities in classification.
|
||||||
|
|
||||||
|
# Internal structures
|
||||||
|
cdef DOUBLE_t* y # Values of y
|
||||||
|
cdef SIZE_t y_stride # Stride in y (since n_outputs >= 1)
|
||||||
|
cdef DOUBLE_t* sample_weight # Sample weights
|
||||||
|
|
||||||
|
cdef SIZE_t* samples # Sample indices in X, y
|
||||||
|
cdef SIZE_t start # samples[start:pos] are the samples in the left node
|
||||||
|
cdef SIZE_t pos # samples[pos:end] are the samples in the right node
|
||||||
|
cdef SIZE_t end
|
||||||
|
|
||||||
|
cdef SIZE_t n_outputs # Number of outputs
|
||||||
|
cdef SIZE_t n_node_samples # Number of samples in the node (end-start)
|
||||||
|
cdef double weighted_n_samples # Weighted number of samples (in total)
|
||||||
|
cdef double weighted_n_node_samples # Weighted number of samples in the node
|
||||||
|
cdef double weighted_n_left # Weighted number of samples in the left node
|
||||||
|
cdef double weighted_n_right # Weighted number of samples in the right node
|
||||||
|
|
||||||
|
# The criterion object is maintained such that left and right collected
|
||||||
|
# statistics correspond to samples[start:pos] and samples[pos:end].
|
||||||
|
|
||||||
|
# Methods
|
||||||
|
cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
|
||||||
|
double weighted_n_samples, SIZE_t* samples, SIZE_t start,
|
||||||
|
SIZE_t end) nogil
|
||||||
|
cdef void reset(self) nogil
|
||||||
|
cdef void update(self, SIZE_t new_pos) nogil
|
||||||
|
cdef double node_impurity(self) nogil
|
||||||
|
cdef void children_impurity(self, double* impurity_left,
|
||||||
|
double* impurity_right) nogil
|
||||||
|
cdef void node_value(self, double* dest) nogil
|
||||||
|
cdef double impurity_improvement(self, double impurity) nogil
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Splitter
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
cdef struct SplitRecord:
|
||||||
|
# Data to track sample split
|
||||||
|
SIZE_t feature # Which feature to split on.
|
||||||
|
SIZE_t pos # Split samples array at the given position,
|
||||||
|
# i.e. count of samples below threshold for feature.
|
||||||
|
# pos is >= end if the node is a leaf.
|
||||||
|
double threshold # Threshold to split at.
|
||||||
|
double improvement # Impurity improvement given parent node.
|
||||||
|
double impurity_left # Impurity of the left split.
|
||||||
|
double impurity_right # Impurity of the right split.
|
||||||
|
|
||||||
|
|
||||||
|
cdef class Splitter:
|
||||||
|
# The splitter searches in the input space for a feature and a threshold
|
||||||
|
# to split the samples samples[start:end].
|
||||||
|
#
|
||||||
|
# The impurity computations are delegated to a criterion object.
|
||||||
|
|
||||||
|
# Internal structures
|
||||||
|
cdef public Criterion criterion # Impurity criterion
|
||||||
|
cdef public SIZE_t max_features # Number of features to test
|
||||||
|
cdef public SIZE_t min_samples_leaf # Min samples in a leaf
|
||||||
|
cdef public double min_weight_leaf # Minimum weight in a leaf
|
||||||
|
|
||||||
|
cdef object random_state # Random state
|
||||||
|
cdef UINT32_t rand_r_state # sklearn_rand_r random number state
|
||||||
|
|
||||||
|
cdef SIZE_t* samples # Sample indices in X, y
|
||||||
|
cdef SIZE_t n_samples # X.shape[0]
|
||||||
|
cdef double weighted_n_samples # Weighted number of samples
|
||||||
|
cdef SIZE_t* features # Feature indices in X
|
||||||
|
cdef SIZE_t* constant_features # Constant features indices
|
||||||
|
cdef SIZE_t n_features # X.shape[1]
|
||||||
|
cdef DTYPE_t* feature_values # temp. array holding feature values
|
||||||
|
|
||||||
|
cdef SIZE_t start # Start position for the current node
|
||||||
|
cdef SIZE_t end # End position for the current node
|
||||||
|
|
||||||
|
cdef DOUBLE_t* y
|
||||||
|
cdef SIZE_t y_stride
|
||||||
|
cdef DOUBLE_t* sample_weight
|
||||||
|
|
||||||
|
# The samples vector `samples` is maintained by the Splitter object such
|
||||||
|
# that the samples contained in a node are contiguous. With this setting,
|
||||||
|
# `node_split` reorganizes the node samples `samples[start:end]` in two
|
||||||
|
# subsets `samples[start:pos]` and `samples[pos:end]`.
|
||||||
|
|
||||||
|
# The 1-d `features` array of size n_features contains the features
|
||||||
|
# indices and allows fast sampling without replacement of features.
|
||||||
|
|
||||||
|
# The 1-d `constant_features` array of size n_features holds in
|
||||||
|
# `constant_features[:n_constant_features]` the feature ids with
|
||||||
|
# constant values for all the samples that reached a specific node.
|
||||||
|
# The value `n_constant_features` is given by the the parent node to its
|
||||||
|
# child nodes. The content of the range `[n_constant_features:]` is left
|
||||||
|
# undefined, but preallocated for performance reasons
|
||||||
|
# This allows optimization with depth-based tree building.
|
||||||
|
|
||||||
|
# Methods
|
||||||
|
cdef void init(self, object X, np.ndarray y,
|
||||||
|
DOUBLE_t* sample_weight) except *
|
||||||
|
|
||||||
|
cdef void node_reset(self, SIZE_t start, SIZE_t end,
|
||||||
|
double* weighted_n_node_samples) nogil
|
||||||
|
|
||||||
|
cdef void node_split(self,
|
||||||
|
double impurity, # Impurity of the node
|
||||||
|
SplitRecord* split,
|
||||||
|
SIZE_t* n_constant_features) nogil
|
||||||
|
|
||||||
|
cdef void node_value(self, double* dest) nogil
|
||||||
|
|
||||||
|
cdef double node_impurity(self) nogil
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Tree
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
cdef struct Node:
|
||||||
|
# Base storage structure for the nodes in a Tree object
|
||||||
|
|
||||||
|
SIZE_t left_child # id of the left child of the node
|
||||||
|
SIZE_t right_child # id of the right child of the node
|
||||||
|
SIZE_t feature # Feature used for splitting the node
|
||||||
|
DOUBLE_t threshold # Threshold value at the node
|
||||||
|
DOUBLE_t impurity # Impurity of the node (i.e., the value of the criterion)
|
||||||
|
SIZE_t n_node_samples # Number of samples at the node
|
||||||
|
DOUBLE_t weighted_n_node_samples # Weighted number of samples at the node
|
||||||
|
|
||||||
|
|
||||||
|
cdef class Tree:
|
||||||
|
# The Tree object is a binary tree structure constructed by the
|
||||||
|
# TreeBuilder. The tree structure is used for predictions and
|
||||||
|
# feature importances.
|
||||||
|
|
||||||
|
# Input/Output layout
|
||||||
|
cdef public SIZE_t n_features # Number of features in X
|
||||||
|
cdef SIZE_t* n_classes # Number of classes in y[:, k]
|
||||||
|
cdef public SIZE_t n_outputs # Number of outputs in y
|
||||||
|
cdef public SIZE_t max_n_classes # max(n_classes)
|
||||||
|
|
||||||
|
# Inner structures: values are stored separately from node structure,
|
||||||
|
# since size is determined at runtime.
|
||||||
|
cdef public SIZE_t max_depth # Max depth of the tree
|
||||||
|
cdef public SIZE_t node_count # Counter for node IDs
|
||||||
|
cdef public SIZE_t capacity # Capacity of tree, in terms of nodes
|
||||||
|
cdef Node* nodes # Array of nodes
|
||||||
|
cdef double* value # (capacity, n_outputs, max_n_classes) array of values
|
||||||
|
cdef SIZE_t value_stride # = n_outputs * max_n_classes
|
||||||
|
|
||||||
|
# Methods
|
||||||
|
cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf,
|
||||||
|
SIZE_t feature, double threshold, double impurity,
|
||||||
|
SIZE_t n_node_samples,
|
||||||
|
double weighted_n_samples) nogil
|
||||||
|
cdef void _resize(self, SIZE_t capacity) except *
|
||||||
|
cdef int _resize_c(self, SIZE_t capacity=*) nogil
|
||||||
|
|
||||||
|
cdef np.ndarray _get_value_ndarray(self)
|
||||||
|
cdef np.ndarray _get_node_ndarray(self)
|
||||||
|
|
||||||
|
cpdef np.ndarray predict(self, object X)
|
||||||
|
cpdef np.ndarray apply(self, object X)
|
||||||
|
cdef np.ndarray _apply_dense(self, object X)
|
||||||
|
cdef np.ndarray _apply_sparse_csr(self, object X)
|
||||||
|
|
||||||
|
cpdef compute_feature_importances(self, normalize=*)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Tree builder
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
cdef class TreeBuilder:
|
||||||
|
# The TreeBuilder recursively builds a Tree object from training samples,
|
||||||
|
# using a Splitter object for splitting internal nodes and assigning
|
||||||
|
# values to leaves.
|
||||||
|
#
|
||||||
|
# This class controls the various stopping criteria and the node splitting
|
||||||
|
# evaluation order, e.g. depth-first or best-first.
|
||||||
|
|
||||||
|
cdef Splitter splitter # Splitting algorithm
|
||||||
|
|
||||||
|
cdef SIZE_t min_samples_split # Minimum number of samples in an internal node
|
||||||
|
cdef SIZE_t min_samples_leaf # Minimum number of samples in a leaf
|
||||||
|
cdef double min_weight_leaf # Minimum weight in a leaf
|
||||||
|
cdef SIZE_t max_depth # Maximal tree depth
|
||||||
|
|
||||||
|
cpdef build(self, Tree tree, object X, np.ndarray y,
|
||||||
|
np.ndarray sample_weight=*)
|
||||||
|
cdef _check_input(self, object X, np.ndarray y, np.ndarray sample_weight)
|
3701
python/isaac/external/_tree.pyx
vendored
Normal file
3701
python/isaac/external/_tree.pyx
vendored
Normal file
File diff suppressed because it is too large
Load Diff
68
python/isaac/external/_utils.pxd
vendored
Normal file
68
python/isaac/external/_utils.pxd
vendored
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
# Authors: Gilles Louppe <g.louppe@gmail.com>
|
||||||
|
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
|
||||||
|
# Arnaud Joly <arnaud.v.joly@gmail.com>
|
||||||
|
#
|
||||||
|
# Licence: BSD 3 clause
|
||||||
|
|
||||||
|
# See _utils.pyx for details.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
cimport numpy as np
|
||||||
|
|
||||||
|
ctypedef np.npy_intp SIZE_t # Type for indices and counters
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Stack data structure
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# A record on the stack for depth-first tree growing
|
||||||
|
cdef struct StackRecord:
|
||||||
|
SIZE_t start
|
||||||
|
SIZE_t end
|
||||||
|
SIZE_t depth
|
||||||
|
SIZE_t parent
|
||||||
|
bint is_left
|
||||||
|
double impurity
|
||||||
|
SIZE_t n_constant_features
|
||||||
|
|
||||||
|
cdef class Stack:
|
||||||
|
cdef SIZE_t capacity
|
||||||
|
cdef SIZE_t top
|
||||||
|
cdef StackRecord* stack_
|
||||||
|
|
||||||
|
cdef bint is_empty(self) nogil
|
||||||
|
cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent,
|
||||||
|
bint is_left, double impurity,
|
||||||
|
SIZE_t n_constant_features) nogil
|
||||||
|
cdef int pop(self, StackRecord* res) nogil
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# PriorityHeap data structure
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# A record on the frontier for best-first tree growing
|
||||||
|
cdef struct PriorityHeapRecord:
|
||||||
|
SIZE_t node_id
|
||||||
|
SIZE_t start
|
||||||
|
SIZE_t end
|
||||||
|
SIZE_t pos
|
||||||
|
SIZE_t depth
|
||||||
|
bint is_leaf
|
||||||
|
double impurity
|
||||||
|
double impurity_left
|
||||||
|
double impurity_right
|
||||||
|
double improvement
|
||||||
|
|
||||||
|
cdef class PriorityHeap:
|
||||||
|
cdef SIZE_t capacity
|
||||||
|
cdef SIZE_t heap_ptr
|
||||||
|
cdef PriorityHeapRecord* heap_
|
||||||
|
|
||||||
|
cdef bint is_empty(self) nogil
|
||||||
|
cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
|
||||||
|
SIZE_t depth, bint is_leaf, double improvement,
|
||||||
|
double impurity, double impurity_left,
|
||||||
|
double impurity_right) nogil
|
||||||
|
cdef int pop(self, PriorityHeapRecord* res) nogil
|
230
python/isaac/external/_utils.pyx
vendored
Normal file
230
python/isaac/external/_utils.pyx
vendored
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
# cython: cdivision=True
|
||||||
|
# cython: boundscheck=False
|
||||||
|
# cython: wraparound=False
|
||||||
|
|
||||||
|
# Authors: Gilles Louppe <g.louppe@gmail.com>
|
||||||
|
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
|
||||||
|
# Arnaud Joly <arnaud.v.joly@gmail.com>
|
||||||
|
#
|
||||||
|
# Licence: BSD 3 clause
|
||||||
|
|
||||||
|
from libc.stdlib cimport free, malloc, realloc
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Stack data structure
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
cdef class Stack:
|
||||||
|
"""A LIFO data structure.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
capacity : SIZE_t
|
||||||
|
The elements the stack can hold; if more added then ``self.stack_``
|
||||||
|
needs to be resized.
|
||||||
|
|
||||||
|
top : SIZE_t
|
||||||
|
The number of elements currently on the stack.
|
||||||
|
|
||||||
|
stack : StackRecord pointer
|
||||||
|
The stack of records (upward in the stack corresponds to the right).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __cinit__(self, SIZE_t capacity):
|
||||||
|
self.capacity = capacity
|
||||||
|
self.top = 0
|
||||||
|
self.stack_ = <StackRecord*> malloc(capacity * sizeof(StackRecord))
|
||||||
|
if self.stack_ == NULL:
|
||||||
|
raise MemoryError()
|
||||||
|
|
||||||
|
def __dealloc__(self):
|
||||||
|
free(self.stack_)
|
||||||
|
|
||||||
|
cdef bint is_empty(self) nogil:
|
||||||
|
return self.top <= 0
|
||||||
|
|
||||||
|
cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent,
|
||||||
|
bint is_left, double impurity,
|
||||||
|
SIZE_t n_constant_features) nogil:
|
||||||
|
"""Push a new element onto the stack.
|
||||||
|
|
||||||
|
Returns 0 if successful; -1 on out of memory error.
|
||||||
|
"""
|
||||||
|
cdef SIZE_t top = self.top
|
||||||
|
cdef StackRecord* stack = NULL
|
||||||
|
|
||||||
|
# Resize if capacity not sufficient
|
||||||
|
if top >= self.capacity:
|
||||||
|
self.capacity *= 2
|
||||||
|
stack = <StackRecord*> realloc(self.stack_,
|
||||||
|
self.capacity * sizeof(StackRecord))
|
||||||
|
if stack == NULL:
|
||||||
|
# no free; __dealloc__ handles that
|
||||||
|
return -1
|
||||||
|
self.stack_ = stack
|
||||||
|
|
||||||
|
stack = self.stack_
|
||||||
|
stack[top].start = start
|
||||||
|
stack[top].end = end
|
||||||
|
stack[top].depth = depth
|
||||||
|
stack[top].parent = parent
|
||||||
|
stack[top].is_left = is_left
|
||||||
|
stack[top].impurity = impurity
|
||||||
|
stack[top].n_constant_features = n_constant_features
|
||||||
|
|
||||||
|
# Increment stack pointer
|
||||||
|
self.top = top + 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
cdef int pop(self, StackRecord* res) nogil:
|
||||||
|
"""Remove the top element from the stack and copy to ``res``.
|
||||||
|
|
||||||
|
Returns 0 if pop was successful (and ``res`` is set); -1
|
||||||
|
otherwise.
|
||||||
|
"""
|
||||||
|
cdef SIZE_t top = self.top
|
||||||
|
cdef StackRecord* stack = self.stack_
|
||||||
|
|
||||||
|
if top <= 0:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
res[0] = stack[top - 1]
|
||||||
|
self.top = top - 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# PriorityHeap data structure
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil:
|
||||||
|
"""Restore heap invariant parent.improvement > child.improvement from
|
||||||
|
``pos`` upwards. """
|
||||||
|
if pos == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
cdef SIZE_t parent_pos = (pos - 1) / 2
|
||||||
|
|
||||||
|
if heap[parent_pos].improvement < heap[pos].improvement:
|
||||||
|
heap[parent_pos], heap[pos] = heap[pos], heap[parent_pos]
|
||||||
|
heapify_up(heap, parent_pos)
|
||||||
|
|
||||||
|
|
||||||
|
cdef void heapify_down(PriorityHeapRecord* heap, SIZE_t pos,
|
||||||
|
SIZE_t heap_length) nogil:
|
||||||
|
"""Restore heap invariant parent.improvement > children.improvement from
|
||||||
|
``pos`` downwards. """
|
||||||
|
cdef SIZE_t left_pos = 2 * (pos + 1) - 1
|
||||||
|
cdef SIZE_t right_pos = 2 * (pos + 1)
|
||||||
|
cdef SIZE_t largest = pos
|
||||||
|
|
||||||
|
if (left_pos < heap_length and
|
||||||
|
heap[left_pos].improvement > heap[largest].improvement):
|
||||||
|
largest = left_pos
|
||||||
|
|
||||||
|
if (right_pos < heap_length and
|
||||||
|
heap[right_pos].improvement > heap[largest].improvement):
|
||||||
|
largest = right_pos
|
||||||
|
|
||||||
|
if largest != pos:
|
||||||
|
heap[pos], heap[largest] = heap[largest], heap[pos]
|
||||||
|
heapify_down(heap, largest, heap_length)
|
||||||
|
|
||||||
|
|
||||||
|
cdef class PriorityHeap:
|
||||||
|
"""A priority queue implemented as a binary heap.
|
||||||
|
|
||||||
|
The heap invariant is that the impurity improvement of the parent record
|
||||||
|
is larger then the impurity improvement of the children.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
capacity : SIZE_t
|
||||||
|
The capacity of the heap
|
||||||
|
|
||||||
|
heap_ptr : SIZE_t
|
||||||
|
The water mark of the heap; the heap grows from left to right in the
|
||||||
|
array ``heap_``. The following invariant holds ``heap_ptr < capacity``.
|
||||||
|
|
||||||
|
heap_ : PriorityHeapRecord*
|
||||||
|
The array of heap records. The maximum element is on the left;
|
||||||
|
the heap grows from left to right
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __cinit__(self, SIZE_t capacity):
|
||||||
|
self.capacity = capacity
|
||||||
|
self.heap_ptr = 0
|
||||||
|
self.heap_ = <PriorityHeapRecord*> malloc(capacity * sizeof(PriorityHeapRecord))
|
||||||
|
if self.heap_ == NULL:
|
||||||
|
raise MemoryError()
|
||||||
|
|
||||||
|
def __dealloc__(self):
|
||||||
|
free(self.heap_)
|
||||||
|
|
||||||
|
cdef bint is_empty(self) nogil:
|
||||||
|
return self.heap_ptr <= 0
|
||||||
|
|
||||||
|
cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
|
||||||
|
SIZE_t depth, bint is_leaf, double improvement,
|
||||||
|
double impurity, double impurity_left,
|
||||||
|
double impurity_right) nogil:
|
||||||
|
"""Push record on the priority heap.
|
||||||
|
|
||||||
|
Returns 0 if successful; -1 on out of memory error.
|
||||||
|
"""
|
||||||
|
cdef SIZE_t heap_ptr = self.heap_ptr
|
||||||
|
cdef PriorityHeapRecord* heap = NULL
|
||||||
|
|
||||||
|
# Resize if capacity not sufficient
|
||||||
|
if heap_ptr >= self.capacity:
|
||||||
|
self.capacity *= 2
|
||||||
|
heap = <PriorityHeapRecord*> realloc(self.heap_,
|
||||||
|
self.capacity *
|
||||||
|
sizeof(PriorityHeapRecord))
|
||||||
|
if heap == NULL:
|
||||||
|
# no free; __dealloc__ handles that
|
||||||
|
return -1
|
||||||
|
self.heap_ = heap
|
||||||
|
|
||||||
|
# Put element as last element of heap
|
||||||
|
heap = self.heap_
|
||||||
|
heap[heap_ptr].node_id = node_id
|
||||||
|
heap[heap_ptr].start = start
|
||||||
|
heap[heap_ptr].end = end
|
||||||
|
heap[heap_ptr].pos = pos
|
||||||
|
heap[heap_ptr].depth = depth
|
||||||
|
heap[heap_ptr].is_leaf = is_leaf
|
||||||
|
heap[heap_ptr].impurity = impurity
|
||||||
|
heap[heap_ptr].impurity_left = impurity_left
|
||||||
|
heap[heap_ptr].impurity_right = impurity_right
|
||||||
|
heap[heap_ptr].improvement = improvement
|
||||||
|
|
||||||
|
# Heapify up
|
||||||
|
heapify_up(heap, heap_ptr)
|
||||||
|
|
||||||
|
# Increase element count
|
||||||
|
self.heap_ptr = heap_ptr + 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
cdef int pop(self, PriorityHeapRecord* res) nogil:
|
||||||
|
"""Remove max element from the heap. """
|
||||||
|
cdef SIZE_t heap_ptr = self.heap_ptr
|
||||||
|
cdef PriorityHeapRecord* heap = self.heap_
|
||||||
|
|
||||||
|
if heap_ptr <= 0:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
# Take first element
|
||||||
|
res[0] = heap[0]
|
||||||
|
|
||||||
|
# Put last element to the front
|
||||||
|
heap[0], heap[heap_ptr - 1] = heap[heap_ptr - 1], heap[0]
|
||||||
|
|
||||||
|
# Restore heap invariant
|
||||||
|
if heap_ptr > 1:
|
||||||
|
heapify_down(heap, 0, heap_ptr - 1)
|
||||||
|
|
||||||
|
self.heap_ptr = heap_ptr - 1
|
||||||
|
|
||||||
|
return 0
|
1365
python/isaac/external/tree.py
vendored
Normal file
1365
python/isaac/external/tree.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
python/isaac/external/tree.pyc
vendored
Normal file
BIN
python/isaac/external/tree.pyc
vendored
Normal file
Binary file not shown.
Reference in New Issue
Block a user