Created
April 30, 2017 22:40
-
-
Save raghavrv/de282b43b2d70e5dc39f995150805036 to your computer and use it in GitHub Desktop.
The new SplitRecord
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cdef struct SplitRecord: | |
# Data to track sample splitting process | |
# This structure also store the best split found so far | |
SIZE_t feature # Which feature to split on. | |
SIZE_t start | |
SIZE_t end | |
SIZE_t pos # Split samples array at the given position, | |
# i.e. count of samples below threshold for feature. | |
# pos is >= end if the node is a leaf. | |
double impurity | |
double threshold # Threshold to split at. | |
double proxy_improvement # Proxy for impurity improvement to speed up | |
# computation times | |
double improvement # Impurity improvement given parent node. | |
# Use these to compare the current split stats with the best so far | |
SIZE_t best_feature | |
SIZE_t best_pos | |
double best_threshold | |
double best_proxy_improvement | |
# This will be updated only finally to save some computations | |
double best_improvement | |
# stats for left partition | |
SIZE_t n_left | |
double weighted_n_left | |
# stats for right partition | |
SIZE_t n_right | |
double weighted_n_right |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment