lmassaron · September 20, 2018 05:47 · lmassaron · Sep 20, 2018
diff --git a/heuristic_interaction_detection b/heuristic_interaction_detection
 from sklearn.datasets import load_boston
 from sklearn.ensemble import GradientBoostingRegressor
 from sklearn.ensemble import BaggingRegressor
 import pandas as pd
 import numpy as np
 boston = load_boston()

 X = pd.DataFrame(boston['data'], columns=boston['feature_names'])
 y = boston['target']

 gbm = GradientBoostingRegressor(n_estimators=100, 
                                subsample=1.0, 
                                max_depth=3)

 bagging = BaggingRegressor(base_estimator=gbm, 
                           n_estimators=10, 
                           max_samples=0.7, 
                           max_features=0.7, 
                           bootstrap=True, 
                           n_jobs=1)
 bagging.fit(X, y)

 trees = [tree[0] for bagged in bagging.estimators_ for tree in bagged.estimators_]
 feature_names = boston['feature_names']

 from sklearn.tree import _tree
 import itertools
 import sys
 sys.setrecursionlimit(1000)

 tree = trees[0]

 def filter_repetitions(l):
    lenght = len(l)
    return [l[idx] for idx in range(lenght) if idx==lenght-1 or l[idx]!= l[idx+1]]

 def recurse(tree, node=0, depth=1, path=list(), paths = list()):
    name = feature_name[node] 
    if tree_.feature[node] != _tree.TREE_UNDEFINED:
        recurse(tree, tree.children_left[node], depth+1, path + [name], paths)
        recurse(tree, tree.children_right[node], depth+1, path + [name], paths)
        return filter_repetitions(paths)
    else:
        paths.append(filter_repetitions(path))
        
 def propose_interactions(tree):
    tree_ = tree.tree_
    feature_name = [feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" for i in tree_.feature]
    return recurse(tree_)

 def expand_effects(interaction):
    effects = set()
    for L in range(0, len(interaction)+1):
        for subset in itertools.permutations(interaction, L):
            subset = sorted(list(subset))
            if len(subset) > 1:
                effects.add(tuple(subset))
    return list(effects)

 def interaction_heuristic(trees):
    pass

 candidates = dict()
 for tree in trees:
    for interaction in propose_interactions(tree):
        for effect in expand_effects(interaction):
            if effect in candidates:
                candidates[tuple(effect)] +=1
            else:
                candidates[tuple(effect)] = 1
                
 print(candidates)

 def tree_to_code(tree, feature_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    print("def tree({}):".format(", ".join(feature_names)))

    def recurse(node, depth):
        indent = "  " * depth
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            print("{}if {} <= {}:".format(indent, name, threshold))
            recurse(tree_.children_left[node], depth + 1)
            print("{}else:  # if {} > {}".format(indent, name, threshold))
            recurse(tree_.children_right[node], depth + 1)
        else:
            print("{}return {}".format(indent, tree_.value[node]))

    recurse(0, 1)
    
 tree_to_code(trees[0], boston['feature_names'])
	from sklearn.datasets import load_boston
	from sklearn.ensemble import GradientBoostingRegressor
	from sklearn.ensemble import BaggingRegressor
	import pandas as pd
	import numpy as np
	boston = load_boston()

	X = pd.DataFrame(boston['data'], columns=boston['feature_names'])
	y = boston['target']

	gbm = GradientBoostingRegressor(n_estimators=100,
	subsample=1.0,
	max_depth=3)

	bagging = BaggingRegressor(base_estimator=gbm,
	n_estimators=10,
	max_samples=0.7,
	max_features=0.7,
	bootstrap=True,
	n_jobs=1)
	bagging.fit(X, y)

	trees = [tree[0] for bagged in bagging.estimators_ for tree in bagged.estimators_]
	feature_names = boston['feature_names']

	from sklearn.tree import _tree
	import itertools
	import sys
	sys.setrecursionlimit(1000)

	tree = trees[0]

	def filter_repetitions(l):
	lenght = len(l)
	return [l[idx] for idx in range(lenght) if idx==lenght-1 or l[idx]!= l[idx+1]]

	def recurse(tree, node=0, depth=1, path=list(), paths = list()):
	name = feature_name[node]
	if tree_.feature[node] != _tree.TREE_UNDEFINED:
	recurse(tree, tree.children_left[node], depth+1, path + [name], paths)
	recurse(tree, tree.children_right[node], depth+1, path + [name], paths)
	return filter_repetitions(paths)
	else:
	paths.append(filter_repetitions(path))

	def propose_interactions(tree):
	tree_ = tree.tree_
	feature_name = [feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" for i in tree_.feature]
	return recurse(tree_)

	def expand_effects(interaction):
	effects = set()
	for L in range(0, len(interaction)+1):
	for subset in itertools.permutations(interaction, L):
	subset = sorted(list(subset))
	if len(subset) > 1:
	effects.add(tuple(subset))
	return list(effects)

	def interaction_heuristic(trees):
	pass

	candidates = dict()
	for tree in trees:
	for interaction in propose_interactions(tree):
	for effect in expand_effects(interaction):
	if effect in candidates:
	candidates[tuple(effect)] +=1
	else:
	candidates[tuple(effect)] = 1

	print(candidates)

	def tree_to_code(tree, feature_names):
	tree_ = tree.tree_
	feature_name = [
	feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
	for i in tree_.feature
	]
	print("def tree({}):".format(", ".join(feature_names)))

	def recurse(node, depth):
	indent = " " * depth
	if tree_.feature[node] != _tree.TREE_UNDEFINED:
	name = feature_name[node]
	threshold = tree_.threshold[node]
	print("{}if {} <= {}:".format(indent, name, threshold))
	recurse(tree_.children_left[node], depth + 1)
	print("{}else: # if {} > {}".format(indent, name, threshold))
	recurse(tree_.children_right[node], depth + 1)
	else:
	print("{}return {}".format(indent, tree_.value[node]))

	recurse(0, 1)

	tree_to_code(trees[0], boston['feature_names'])
No results found