Created
September 20, 2018 05:47
-
-
Save lmassaron/69e1a8a37a7c799bcef2c63901993c18 to your computer and use it in GitHub Desktop.
Extracting decision rules from Scikit-learn tree data-structures
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.datasets import load_boston | |
| from sklearn.ensemble import GradientBoostingRegressor | |
| from sklearn.ensemble import BaggingRegressor | |
| import pandas as pd | |
| import numpy as np | |
| boston = load_boston() | |
| X = pd.DataFrame(boston['data'], columns=boston['feature_names']) | |
| y = boston['target'] | |
| gbm = GradientBoostingRegressor(n_estimators=100, | |
| subsample=1.0, | |
| max_depth=3) | |
| bagging = BaggingRegressor(base_estimator=gbm, | |
| n_estimators=10, | |
| max_samples=0.7, | |
| max_features=0.7, | |
| bootstrap=True, | |
| n_jobs=1) | |
| bagging.fit(X, y) | |
| trees = [tree[0] for bagged in bagging.estimators_ for tree in bagged.estimators_] | |
| feature_names = boston['feature_names'] | |
| from sklearn.tree import _tree | |
| import itertools | |
| import sys | |
| sys.setrecursionlimit(1000) | |
| tree = trees[0] | |
| def filter_repetitions(l): | |
| lenght = len(l) | |
| return [l[idx] for idx in range(lenght) if idx==lenght-1 or l[idx]!= l[idx+1]] | |
| def recurse(tree, node=0, depth=1, path=list(), paths = list()): | |
| name = feature_name[node] | |
| if tree_.feature[node] != _tree.TREE_UNDEFINED: | |
| recurse(tree, tree.children_left[node], depth+1, path + [name], paths) | |
| recurse(tree, tree.children_right[node], depth+1, path + [name], paths) | |
| return filter_repetitions(paths) | |
| else: | |
| paths.append(filter_repetitions(path)) | |
| def propose_interactions(tree): | |
| tree_ = tree.tree_ | |
| feature_name = [feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" for i in tree_.feature] | |
| return recurse(tree_) | |
| def expand_effects(interaction): | |
| effects = set() | |
| for L in range(0, len(interaction)+1): | |
| for subset in itertools.permutations(interaction, L): | |
| subset = sorted(list(subset)) | |
| if len(subset) > 1: | |
| effects.add(tuple(subset)) | |
| return list(effects) | |
| def interaction_heuristic(trees): | |
| pass | |
| candidates = dict() | |
| for tree in trees: | |
| for interaction in propose_interactions(tree): | |
| for effect in expand_effects(interaction): | |
| if effect in candidates: | |
| candidates[tuple(effect)] +=1 | |
| else: | |
| candidates[tuple(effect)] = 1 | |
| print(candidates) | |
| def tree_to_code(tree, feature_names): | |
| tree_ = tree.tree_ | |
| feature_name = [ | |
| feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" | |
| for i in tree_.feature | |
| ] | |
| print("def tree({}):".format(", ".join(feature_names))) | |
| def recurse(node, depth): | |
| indent = " " * depth | |
| if tree_.feature[node] != _tree.TREE_UNDEFINED: | |
| name = feature_name[node] | |
| threshold = tree_.threshold[node] | |
| print("{}if {} <= {}:".format(indent, name, threshold)) | |
| recurse(tree_.children_left[node], depth + 1) | |
| print("{}else: # if {} > {}".format(indent, name, threshold)) | |
| recurse(tree_.children_right[node], depth + 1) | |
| else: | |
| print("{}return {}".format(indent, tree_.value[node])) | |
| recurse(0, 1) | |
| tree_to_code(trees[0], boston['feature_names']) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://projecteuclid.org/download/pdfview_1/euclid.aoas/1223908046
FRIEDMAN, Jerome H., et al. Predictive learning via rule ensembles. The Annals of Applied Statistics, 2008, 2.3: 916-954.