Skip to content

Instantly share code, notes, and snippets.

@lmassaron
Created September 20, 2018 05:47
Show Gist options
  • Select an option

  • Save lmassaron/69e1a8a37a7c799bcef2c63901993c18 to your computer and use it in GitHub Desktop.

Select an option

Save lmassaron/69e1a8a37a7c799bcef2c63901993c18 to your computer and use it in GitHub Desktop.
Extracting decision rules from Scikit-learn tree data-structures
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import BaggingRegressor
import pandas as pd
import numpy as np
boston = load_boston()
X = pd.DataFrame(boston['data'], columns=boston['feature_names'])
y = boston['target']
gbm = GradientBoostingRegressor(n_estimators=100,
subsample=1.0,
max_depth=3)
bagging = BaggingRegressor(base_estimator=gbm,
n_estimators=10,
max_samples=0.7,
max_features=0.7,
bootstrap=True,
n_jobs=1)
bagging.fit(X, y)
trees = [tree[0] for bagged in bagging.estimators_ for tree in bagged.estimators_]
feature_names = boston['feature_names']
from sklearn.tree import _tree
import itertools
import sys
sys.setrecursionlimit(1000)
tree = trees[0]
def filter_repetitions(l):
lenght = len(l)
return [l[idx] for idx in range(lenght) if idx==lenght-1 or l[idx]!= l[idx+1]]
def recurse(tree, node=0, depth=1, path=list(), paths = list()):
name = feature_name[node]
if tree_.feature[node] != _tree.TREE_UNDEFINED:
recurse(tree, tree.children_left[node], depth+1, path + [name], paths)
recurse(tree, tree.children_right[node], depth+1, path + [name], paths)
return filter_repetitions(paths)
else:
paths.append(filter_repetitions(path))
def propose_interactions(tree):
tree_ = tree.tree_
feature_name = [feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" for i in tree_.feature]
return recurse(tree_)
def expand_effects(interaction):
effects = set()
for L in range(0, len(interaction)+1):
for subset in itertools.permutations(interaction, L):
subset = sorted(list(subset))
if len(subset) > 1:
effects.add(tuple(subset))
return list(effects)
def interaction_heuristic(trees):
pass
candidates = dict()
for tree in trees:
for interaction in propose_interactions(tree):
for effect in expand_effects(interaction):
if effect in candidates:
candidates[tuple(effect)] +=1
else:
candidates[tuple(effect)] = 1
print(candidates)
def tree_to_code(tree, feature_names):
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
print("def tree({}):".format(", ".join(feature_names)))
def recurse(node, depth):
indent = " " * depth
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
print("{}if {} <= {}:".format(indent, name, threshold))
recurse(tree_.children_left[node], depth + 1)
print("{}else: # if {} > {}".format(indent, name, threshold))
recurse(tree_.children_right[node], depth + 1)
else:
print("{}return {}".format(indent, tree_.value[node]))
recurse(0, 1)
tree_to_code(trees[0], boston['feature_names'])
@lmassaron
Copy link
Copy Markdown
Author

https://projecteuclid.org/download/pdfview_1/euclid.aoas/1223908046

FRIEDMAN, Jerome H., et al. Predictive learning via rule ensembles. The Annals of Applied Statistics, 2008, 2.3: 916-954.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment