Files
triton/python/external/sklearn/_utils.pyx

231 lines
7.1 KiB
Cython

# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# Authors: Gilles Louppe <g.louppe@gmail.com>
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Arnaud Joly <arnaud.v.joly@gmail.com>
#
# Licence: BSD 3 clause
from libc.stdlib cimport free, malloc, realloc
# =============================================================================
# Stack data structure
# =============================================================================
cdef class Stack:
"""A LIFO data structure.
Attributes
----------
capacity : SIZE_t
The elements the stack can hold; if more added then ``self.stack_``
needs to be resized.
top : SIZE_t
The number of elements currently on the stack.
stack : StackRecord pointer
The stack of records (upward in the stack corresponds to the right).
"""
def __cinit__(self, SIZE_t capacity):
self.capacity = capacity
self.top = 0
self.stack_ = <StackRecord*> malloc(capacity * sizeof(StackRecord))
if self.stack_ == NULL:
raise MemoryError()
def __dealloc__(self):
free(self.stack_)
cdef bint is_empty(self) nogil:
return self.top <= 0
cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent,
bint is_left, double impurity,
SIZE_t n_constant_features) nogil:
"""Push a new element onto the stack.
Returns 0 if successful; -1 on out of memory error.
"""
cdef SIZE_t top = self.top
cdef StackRecord* stack = NULL
# Resize if capacity not sufficient
if top >= self.capacity:
self.capacity *= 2
stack = <StackRecord*> realloc(self.stack_,
self.capacity * sizeof(StackRecord))
if stack == NULL:
# no free; __dealloc__ handles that
return -1
self.stack_ = stack
stack = self.stack_
stack[top].start = start
stack[top].end = end
stack[top].depth = depth
stack[top].parent = parent
stack[top].is_left = is_left
stack[top].impurity = impurity
stack[top].n_constant_features = n_constant_features
# Increment stack pointer
self.top = top + 1
return 0
cdef int pop(self, StackRecord* res) nogil:
"""Remove the top element from the stack and copy to ``res``.
Returns 0 if pop was successful (and ``res`` is set); -1
otherwise.
"""
cdef SIZE_t top = self.top
cdef StackRecord* stack = self.stack_
if top <= 0:
return -1
res[0] = stack[top - 1]
self.top = top - 1
return 0
# =============================================================================
# PriorityHeap data structure
# =============================================================================
cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil:
"""Restore heap invariant parent.improvement > child.improvement from
``pos`` upwards. """
if pos == 0:
return
cdef SIZE_t parent_pos = (pos - 1) / 2
if heap[parent_pos].improvement < heap[pos].improvement:
heap[parent_pos], heap[pos] = heap[pos], heap[parent_pos]
heapify_up(heap, parent_pos)
cdef void heapify_down(PriorityHeapRecord* heap, SIZE_t pos,
SIZE_t heap_length) nogil:
"""Restore heap invariant parent.improvement > children.improvement from
``pos`` downwards. """
cdef SIZE_t left_pos = 2 * (pos + 1) - 1
cdef SIZE_t right_pos = 2 * (pos + 1)
cdef SIZE_t largest = pos
if (left_pos < heap_length and
heap[left_pos].improvement > heap[largest].improvement):
largest = left_pos
if (right_pos < heap_length and
heap[right_pos].improvement > heap[largest].improvement):
largest = right_pos
if largest != pos:
heap[pos], heap[largest] = heap[largest], heap[pos]
heapify_down(heap, largest, heap_length)
cdef class PriorityHeap:
"""A priority queue implemented as a binary heap.
The heap invariant is that the impurity improvement of the parent record
is larger then the impurity improvement of the children.
Attributes
----------
capacity : SIZE_t
The capacity of the heap
heap_ptr : SIZE_t
The water mark of the heap; the heap grows from left to right in the
array ``heap_``. The following invariant holds ``heap_ptr < capacity``.
heap_ : PriorityHeapRecord*
The array of heap records. The maximum element is on the left;
the heap grows from left to right
"""
def __cinit__(self, SIZE_t capacity):
self.capacity = capacity
self.heap_ptr = 0
self.heap_ = <PriorityHeapRecord*> malloc(capacity * sizeof(PriorityHeapRecord))
if self.heap_ == NULL:
raise MemoryError()
def __dealloc__(self):
free(self.heap_)
cdef bint is_empty(self) nogil:
return self.heap_ptr <= 0
cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
SIZE_t depth, bint is_leaf, double improvement,
double impurity, double impurity_left,
double impurity_right) nogil:
"""Push record on the priority heap.
Returns 0 if successful; -1 on out of memory error.
"""
cdef SIZE_t heap_ptr = self.heap_ptr
cdef PriorityHeapRecord* heap = NULL
# Resize if capacity not sufficient
if heap_ptr >= self.capacity:
self.capacity *= 2
heap = <PriorityHeapRecord*> realloc(self.heap_,
self.capacity *
sizeof(PriorityHeapRecord))
if heap == NULL:
# no free; __dealloc__ handles that
return -1
self.heap_ = heap
# Put element as last element of heap
heap = self.heap_
heap[heap_ptr].node_id = node_id
heap[heap_ptr].start = start
heap[heap_ptr].end = end
heap[heap_ptr].pos = pos
heap[heap_ptr].depth = depth
heap[heap_ptr].is_leaf = is_leaf
heap[heap_ptr].impurity = impurity
heap[heap_ptr].impurity_left = impurity_left
heap[heap_ptr].impurity_right = impurity_right
heap[heap_ptr].improvement = improvement
# Heapify up
heapify_up(heap, heap_ptr)
# Increase element count
self.heap_ptr = heap_ptr + 1
return 0
cdef int pop(self, PriorityHeapRecord* res) nogil:
"""Remove max element from the heap. """
cdef SIZE_t heap_ptr = self.heap_ptr
cdef PriorityHeapRecord* heap = self.heap_
if heap_ptr <= 0:
return -1
# Take first element
res[0] = heap[0]
# Put last element to the front
heap[0], heap[heap_ptr - 1] = heap[heap_ptr - 1], heap[0]
# Restore heap invariant
if heap_ptr > 1:
heapify_down(heap, 0, heap_ptr - 1)
self.heap_ptr = heap_ptr - 1
return 0