Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active December 22, 2015 12:19
Show Gist options
  • Select an option

  • Save ashenfad/6471887 to your computer and use it in GitHub Desktop.

Select an option

Save ashenfad/6471887 to your computer and use it in GitHub Desktop.
BigML Tree - Iris (TreeMap)

A tree mapping of a BigML decision tree built on the iris dataset.

The outer most rectangle represents the root of the tree. Each inner retangle represents the children of the outer rectangle's nodes. The number of training instances captured by a node determine its size.

<!DOCTYPE html>
<meta charset="utf-8">
<style>
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
margin: auto;
position: relative;
width: 960px;
background: #fff;
}
#color-controls {
font: 14px sans-serif;
position: absolute;
right: 10px;
top: 10px;
padding: 3px;
}
#color-controls div {
padding: 4px;
}
#hover-info {
font: 14px sans-serif;
position: absolute;
left: 10px;
top: 10px;
}
#summary-info {
font: 14px sans-serif;
position: absolute;
left: 10px;
bottom: 10px;
font: 12px sans-serif;
}
#summary-info div {
padding: 2px;
}
.split-predicate {
font-weight:bold;
border-bottom: 1px solid #DFDFDF;
padding: 7px;
}
.node-info {
margin-top: 10px;
}
.node-info td {
padding: 2px 7px 2px;
}
</style>
<body>
<div id="color-controls">
<form>
<div>
<input type="radio" name="mode" value="prediction" checked \>
<label>Prediction</label>
</div>
<div>
<input type="radio" name="mode" value="confidence"\>
<label id="cnf">Confidence</label>
</div>
<div>
<input type="radio" name="mode" value="split"\>
<label>Split Field</label>
</div>
</form>
</div>
<div id="hover-info"></div>
<div id="summary-info"></div>
</body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
var width = 960,
height = 600;
function hover_adjust(d, color) {
return d.hover ? d3.rgb(color).brighter(0.66) : color;
}
var treemap = d3.layout.treemap()
.padding(10)
.size([width - 50, height - 190])
.sticky(true)
.value(function(d) { return d.count; });
var svg = d3.select("body").append("svg:svg")
.attr("width", width)
.attr("height", height)
.append("svg:g")
.attr("transform", "translate(20,120)");
function find_minmax(node, attr) {
if (node.children) {
minmaxs = node.children.map(function (n) { return find_minmax(n, attr); });
min = Math.min.apply(null, minmaxs.map(function (mm) {return mm.min}));
max = Math.max.apply(null, minmaxs.map(function (mm) {return mm.max}));
return {"min": Math.min(min, node[attr]), "max": Math.max(max, node[attr])};
} else {
return {"min": node[attr], "max": node[attr]};
}
}
d3.json("iris-model.json", function(error, root) {
var model = root.model;
var model_type = model.model_fields[root.objective_field].optype == "categorical" ?
"classification" : "regression";
var minmaxs = {};
if (model_type == "classification") {
minmaxs.confidence = find_minmax(model.root, "confidence");
} else {
// Hacky label switch for regression trees
document.getElementById("cnf").innerHTML = "Expected Error";
minmaxs.expected_error = find_minmax(model.root, "confidence");
minmaxs.output = find_minmax(model.root, "output");
}
var scale_pred = model_type == "classification" ?
d3.scale.category10() :
d3.scale.linear().domain([minmaxs.output.min,
minmaxs.output.max])
.range(["#222", "#2ee"]);
var scale_conf = model_type == "classification" ?
d3.scale.linear().domain([minmaxs.confidence.min,
minmaxs.confidence.max])
.range(["#d33", "#3d3"]) :
d3.scale.linear().domain([minmaxs.expected_error.max,
minmaxs.expected_error.min])
.range(["#d33", "#3d3"]);
var scale_split = d3.scale.category20b();
var color_lookup =
{"prediction": function(d) { return hover_adjust(d, scale_pred(d.output)); },
"confidence": function(d) { return hover_adjust(d, scale_conf(d.confidence)); },
"split": function(d) { return hover_adjust(d, scale_split(d.predicate.field)); } };
var color_fn = color_lookup["prediction"];
var path = svg.selectAll("g")
.data(treemap.nodes(root.model.root))
.enter().append("svg:g")
.attr("class", "cell")
.attr("transform", function(d) { return "translate(" + d.x + "," + d.y + ")"; })
.append("svg:rect")
.attr("width", function(d) { return d.dx; })
.attr("height", function(d) { return d.dy; })
.style("fill", color_fn)
.style("stroke", "#333")
.on("mouseover", mouseover)
.on("mouseout", mouseout);
d3.selectAll("input").on("change", change);
function change() {
color_fn = color_lookup[this.value];
path.transition().duration(250).style("fill", color_fn);
}
function mouseover(d) {
var split = d.predicate;
var split_msg = split.field ? model.model_fields[split.field].name + " " +
split.operator + " " + split.value
: "Tree Root";
var conf_msg = {"classification": "Confidence", "regression": "Expected Error"};
var hover = d3.select("#hover-info");
hover.append("div").attr("class", "split-predicate").text(split_msg);
tbody = hover.append("table").attr("class", "node-info").append("tbody");
var output = model_type == "classification" ? d.output : parseFloat(d.output.toFixed(3));
table_add(tbody, "Prediction", output);
table_add(tbody, conf_msg[model_type], parseFloat(d.confidence.toFixed(3)));
table_add(tbody, "Count", d.count);
mark_hover(d, true);
path.style("fill", color_fn);
var summ_doc = d3.select("#summary-info");
var summaries = summarize(d);
for (id in summaries) {
if (!summaries.hasOwnProperty(id)) { continue; }
var name = model.model_fields[id].name;
var msg = name;
fs = summaries[id];
if (isNum(fs.min)) {
msg = parseFloat(fs.min.toFixed(3)) + " < " + msg;
}
if (isNum(fs.max)) {
msg += " <= " + parseFloat(fs.max.toFixed(3));
}
if (fs.eq) {
msg += " = " + fs.eq;
} else if (fs.not_eq) {
msg += " !=";
var first = true;
for (category in fs.not_eq) {
if (first) {
first = false;
} else {
msg += "|";
}
if (!fs.not_eq.hasOwnProperty(category)) { continue; }
msg += " " + category;
}
msg = msg;
}
summ_doc.append("div").text(msg);
}
}
function mouseout(d) {
d3.select("#hover-info").html("");
d3.select("#summary-info").html("");
mark_hover(d, false);
path.style("fill", color_fn);
}
function mark_hover (d, val) {
if (d.parent) { mark_hover(d.parent, val); };
d.hover = val;
}
});
d3.select(self.frameElement).style("height", height + "px");
function isNum(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
function summarize (node) {
var pred = node.predicate;
if (node.parent) {
var summary = summarize(node.parent);
switch(pred.operator) {
case "<=":
if (summary[pred.field]) {
var old_max = summary[pred.field].max;
max = isNum(old_max) ? Math.min(pred.value, old_max) : pred.value;
summary[pred.field].max = max;
} else {
summary[pred.field] = {"max": pred.value};
}
break;
case ">":
if (summary[pred.field]) {
var old_min = summary[pred.field].min;
min = isNum(old_min) ? Math.max(pred.value, old_min) : pred.value;
summary[pred.field].min = min;
} else {
summary[pred.field] = {"min": pred.value};
}
break;
case "=":
summary[pred.field] = {"eq": pred.value};
break;
case "!=":
if (!summary[pred.field]) {
summary[pred.field] = {};
}
if (!summary[pred.field].not_eq) {
summary[pred.field].not_eq = {};
}
summary[pred.field].not_eq[pred.value] = true;
break;
}
return summary;
} else {
return {};
}
}
function table_add (table, field, val) {
var row = table.append("tr");
row.append("td").text(field);
row.append("td").text(val);
return row;
}
</script>
{"public_dataset": false, "code": 200, "locale": "en-US", "short_url": "", "image": "gallery/model.png", "job_type": 400, "private": true, "dataset": "dataset/514bd86f035d07049200003f", "fields_meta": {"count": 5, "total": 5, "limit": 1000, "offset": 0}, "white_box": false, "dataset_id": "514bd86f035d07049200003f", "randomize": false, "id": "514fb3d00c0b5e04c3000093", "number_of_predictions": 0, "category": 0, "modified_fields": {}, "rows": 150, "out_of_bag": false, "ordering": 0, "fields_to_show": [{"optype": "numeric", "is_objective": false, "name": "sepal length", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "4.30"], ["Mean", "5.84"], ["Median", "5.78"], ["Maximum", "7.90"], ["Std dev", "0.83"]], "id": "000000", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "sepal width", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "2.00"], ["Mean", "3.06"], ["Median", "3.02"], ["Maximum", "4.40"], ["Std dev", "0.44"]], "id": "000001", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "petal length", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "1.00"], ["Mean", "3.76"], ["Median", "4.34"], ["Maximum", "6.90"], ["Std dev", "1.77"]], "id": "000002", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "petal width", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "0.10"], ["Mean", "1.20"], ["Median", "1.33"], ["Maximum", "2.50"], ["Std dev", "0.76"]], "id": "000003", "css": "numeric type_label datatype-tooltip"}, {"optype": "categorical", "is_objective": true, "name": "species", "datatype_name": "Categorical", "tooltip": "Categorical", "id": "000004", "css": "categorical type_label datatype-tooltip"}], "views": 0, "size": 4608, "range": [1, 150], "credits_per_prediction": 0.0, "source": "source/514bcf9d0c0b5e3fd100026b", "number_of_public_predictions": 0, "sample_rate": 1.0, "objective_fields": ["000004"], "user_name": "ashenfad", "columns": 5, "selective_pruning": false, "status": {"progress": 1.0, "message": "The model has been created", "code": 5, "elapsed": 89}, "updated": "2013-03-25T02:17:52.927000", "description": "", "tags": [], "price": 0.0, "excluded_fields": [], "cloned": false, "credits": 0.017578125, "stat_pruning": true, "objective_field": "000004", "clones": 0, "resource": "model/514fb3d00c0b5e04c3000093", "name": "Iris Model", "created": "Mon, 25 Mar 2013 02:17:52 +0000", "url": "", "dataset_status": true, "source_status": true, "number_of_evaluations": 0, "max_columns": 5, "max_rows": 150, "input_fields": ["000000", "000001", "000002", "000003"], "published": null, "source_id": "514bcf9d0c0b5e3fd100026b", "model": {"kind": "stree", "missing_strategy": "Last prediction", "importance": [["000002", 0.53556], ["000003", 0.46444], ["000000", 0], ["000001", 0]], "fields": {"000004": {"optype": "categorical", "name": "species", "datatype": "string", "preferred": true, "summary": {"missing_count": 0, "categories": [["Iris-versicolor", 50], ["Iris-setosa", 50], ["Iris-virginica", 50]]}, "column_number": 4, "order": 4}, "000002": {"optype": "numeric", "name": "petal length", "datatype": "double", "preferred": true, "summary": {"sum_squares": 2582.71, "splits": [1.25138, 1.32426, 1.37171, 1.40962, 1.44567, 1.48173, 1.51859, 1.56301, 1.6255, 1.74645, 3.23033, 3.675, 3.94203, 4.0469, 4.18243, 4.34142, 4.45309, 4.51823, 4.61771, 4.72566, 4.83445, 4.93363, 5.03807, 5.1064, 5.20938, 5.43979, 5.5744, 5.6646, 5.81496, 6.02913, 6.38125], "missing_count": 0, "sum": 563.7, "median": 4.34142, "maximum": 6.9, "minimum": 1, "standard_deviation": 1.7653, "variance": 3.11628, "population": 150, "bins": [[1, 1], [1.1, 1], [1.2, 2], [1.3, 7], [1.4, 13], [1.5, 13], [1.63636, 11], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.46, 5], [5.6, 6], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "mean": 3.758}, "column_number": 2, "order": 2}, "000003": {"optype": "numeric", "name": "petal width", "datatype": "double", "preferred": true, "summary": {"sum_squares": 302.33, "missing_count": 0, "sum": 179.9, "median": 1.32848, "maximum": 2.5, "minimum": 0.1, "standard_deviation": 0.76224, "variance": 0.58101, "counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "population": 150, "mean": 1.19933}, "column_number": 3, "order": 3}, "000000": {"optype": "numeric", "name": "sepal length", "datatype": "double", "preferred": true, "summary": {"sum_squares": 5223.85, "splits": [4.51526, 4.67252, 4.81113, 4.89582, 4.96139, 5.01131, 5.05992, 5.11148, 5.18177, 5.35681, 5.44129, 5.5108, 5.58255, 5.65532, 5.71658, 5.77889, 5.85381, 5.97078, 6.05104, 6.13074, 6.23023, 6.29578, 6.35078, 6.41459, 6.49383, 6.63013, 6.70719, 6.79218, 6.92597, 7.20423, 7.64746], "missing_count": 0, "sum": 876.5, "median": 5.77889, "maximum": 7.9, "minimum": 4.3, "standard_deviation": 0.82807, "variance": 0.68569, "population": 150, "bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.7, 2], [4.8, 5], [4.9, 6], [5, 10], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.44167, 12], [6.6, 2], [6.7, 8], [6.8, 3], [6.92, 5], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "mean": 5.84333}, "column_number": 0, "order": 0}, "000001": {"optype": "numeric", "name": "sepal width", "datatype": "double", "preferred": true, "summary": {"sum_squares": 1430.4, "missing_count": 0, "sum": 458.6, "median": 3.02044, "maximum": 4.4, "minimum": 2, "standard_deviation": 0.43587, "variance": 0.18998, "counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "population": 150, "mean": 3.05733}, "column_number": 1, "order": 1}}, "model_fields": {"000004": {"optype": "categorical", "name": "species", "datatype": "string", "preferred": true, "column_number": 4, "order": 4}, "000002": {"optype": "numeric", "name": "petal length", "datatype": "double", "preferred": true, "column_number": 2, "order": 2}, "000003": {"optype": "numeric", "name": "petal width", "datatype": "double", "preferred": true, "column_number": 3, "order": 3}}, "support_threshold": 0, "split_criterion": "Information gain mix", "root": {"count": 150, "confidence": 0.26289, "predicate": true, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-setosa", 50], ["Iris-virginica", 50]]}, "output": "Iris-virginica", "children": [{"count": 50, "output": "Iris-setosa", "confidence": 0.92865, "predicate": {"operator": "<=", "field": "000002", "value": 2.45}, "objective_summary": {"categories": [["Iris-setosa", 50]]}}, {"count": 100, "confidence": 0.40383, "predicate": {"operator": ">", "field": "000002", "value": 2.45}, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-virginica", "children": [{"count": 48, "output": "Iris-virginica", "confidence": 0.86024, "predicate": {"operator": ">", "field": "000003", "value": 1.65}, "objective_summary": {"categories": [["Iris-versicolor", 2], ["Iris-virginica", 46]]}}, {"count": 52, "confidence": 0.81826, "predicate": {"operator": "<=", "field": "000003", "value": 1.65}, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 48]]}, "output": "Iris-versicolor", "children": [{"count": 47, "output": "Iris-versicolor", "confidence": 0.92444, "predicate": {"operator": "<=", "field": "000002", "value": 4.95}, "objective_summary": {"categories": [["Iris-versicolor", 47]]}}, {"count": 5, "output": "Iris-virginica", "confidence": 0.37553, "predicate": {"operator": ">", "field": "000002", "value": 4.95}, "objective_summary": {"categories": [["Iris-versicolor", 1], ["Iris-virginica", 4]]}}]}]}]}, "depth_threshold": 20}, "replacement": false}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment