Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active December 22, 2015 12:09
Show Gist options
  • Select an option

  • Save ashenfad/6470204 to your computer and use it in GitHub Desktop.

Select an option

Save ashenfad/6470204 to your computer and use it in GitHub Desktop.
BigML Tree - Iris (Icicle)

An icicle mapping of a BigML decision tree built on the iris dataset.

The top row represents the root of the tree. Each lower row contains the children of the upper row's nodes. The number of training instances captured by a node determine its width.

<!DOCTYPE html>
<meta charset="utf-8">
<style>
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
margin: auto;
position: relative;
width: 960px;
background: #fff;
}
#color-controls {
font: 14px sans-serif;
position: absolute;
right: 10px;
top: 10px;
padding: 3px;
}
#color-controls div {
padding: 4px;
}
#hover-info {
font: 14px sans-serif;
position: absolute;
left: 10px;
top: 10px;
}
#summary-info {
font: 14px sans-serif;
position: absolute;
left: 10px;
bottom: 10px;
font: 12px sans-serif;
}
#summary-info div {
padding: 2px;
}
.split-predicate {
font-weight:bold;
border-bottom: 1px solid #DFDFDF;
padding: 7px;
}
.node-info {
margin-top: 10px;
}
.node-info td {
padding: 2px 7px 2px;
}
</style>
<body>
<div id="color-controls">
<form>
<div>
<input type="radio" name="mode" value="prediction" checked \>
<label>Prediction</label>
</div>
<div>
<input type="radio" name="mode" value="confidence"\>
<label id="cnf">Confidence</label>
</div>
<div>
<input type="radio" name="mode" value="split"\>
<label>Split Field</label>
</div>
</form>
</div>
<div id="hover-info"></div>
<div id="summary-info"></div>
</body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
var width = 960,
height = 600;
function hover_adjust(d, color) {
return d.hover ? d3.rgb(color).brighter(0.66) : color;
}
var svg = d3.select("body").append("svg")
.attr("width", width)
.attr("height", height)
.append("g");
var partition = d3.layout.partition().value(function(d) { return d.count; });
var x = d3.scale.linear().range([20, width - 30])
var y = d3.scale.linear().range([120, height - 90])
function find_minmax(node, attr) {
if (node.children) {
minmaxs = node.children.map(function (n) { return find_minmax(n, attr); });
min = Math.min.apply(null, minmaxs.map(function (mm) {return mm.min}));
max = Math.max.apply(null, minmaxs.map(function (mm) {return mm.max}));
return {"min": Math.min(min, node[attr]), "max": Math.max(max, node[attr])};
} else {
return {"min": node[attr], "max": node[attr]};
}
}
d3.json("iris-model.json", function(error, root) {
var model = root.model;
var model_type = model.model_fields[root.objective_field].optype == "categorical" ?
"classification" : "regression";
var minmaxs = {};
if (model_type == "classification") {
minmaxs.confidence = find_minmax(model.root, "confidence");
} else {
// Hacky label switch for regression trees
document.getElementById("cnf").innerHTML = "Expected Error";
minmaxs.expected_error = find_minmax(model.root, "confidence");
minmaxs.output = find_minmax(model.root, "output");
}
var scale_pred = model_type == "classification" ?
d3.scale.category10() :
d3.scale.linear().domain([minmaxs.output.min,
minmaxs.output.max])
.range(["#222", "#2ee"]);
var scale_conf = model_type == "classification" ?
d3.scale.linear().domain([minmaxs.confidence.min,
minmaxs.confidence.max])
.range(["#d33", "#3d3"]) :
d3.scale.linear().domain([minmaxs.expected_error.max,
minmaxs.expected_error.min])
.range(["#d33", "#3d3"]);
var scale_split = d3.scale.category20b();
var color_lookup =
{"prediction": function(d) { return hover_adjust(d, scale_pred(d.output)); },
"confidence": function(d) { return hover_adjust(d, scale_conf(d.confidence)); },
"split": function(d) { return hover_adjust(d, scale_split(d.predicate.field)); } };
var color_fn = color_lookup["prediction"];
var path = svg.selectAll("path")
.data(partition.nodes(root.model.root))
.enter().append("svg:rect")
.attr("x", function(d) { return x(d.x); })
.attr("y", function(d) { return y(d.y); })
.attr("width", function(d) { return x(d.dx) - 21; })
.attr("height", function(d) { return 96; })
.style("fill", color_fn)
.on("mouseover", mouseover)
.on("mouseout", mouseout);
d3.selectAll("input").on("change", change);
function change() {
color_fn = color_lookup[this.value];
path.transition().duration(250).style("fill", color_fn);
}
function mouseover(d) {
var split = d.predicate;
var split_msg = split.field ? model.model_fields[split.field].name + " " +
split.operator + " " + split.value
: "Tree Root";
var conf_msg = {"classification": "Confidence", "regression": "Expected Error"};
var hover = d3.select("#hover-info");
hover.append("div").attr("class", "split-predicate").text(split_msg);
tbody = hover.append("table").attr("class", "node-info").append("tbody");
var output = model_type == "classification" ? d.output : parseFloat(d.output.toFixed(3));
table_add(tbody, "Prediction", output);
table_add(tbody, conf_msg[model_type], parseFloat(d.confidence.toFixed(3)));
table_add(tbody, "Count", d.count);
mark_hover(d, true);
path.style("fill", color_fn);
var summ_doc = d3.select("#summary-info");
var summaries = summarize(d);
for (id in summaries) {
if (!summaries.hasOwnProperty(id)) { continue; }
var name = model.model_fields[id].name;
var msg = name;
fs = summaries[id];
if (isNum(fs.min)) {
msg = parseFloat(fs.min.toFixed(3)) + " < " + msg;
}
if (isNum(fs.max)) {
msg += " <= " + parseFloat(fs.max.toFixed(3));
}
if (fs.eq) {
msg += " = " + fs.eq;
} else if (fs.not_eq) {
msg += " !=";
var first = true;
for (category in fs.not_eq) {
if (first) {
first = false;
} else {
msg += "|";
}
if (!fs.not_eq.hasOwnProperty(category)) { continue; }
msg += " " + category;
}
msg = msg;
}
summ_doc.append("div").text(msg);
}
}
function mouseout(d) {
d3.select("#hover-info").html("");
d3.select("#summary-info").html("");
mark_hover(d, false);
path.style("fill", color_fn);
}
function mark_hover (d, val) {
if (d.parent) { mark_hover(d.parent, val); };
d.hover = val;
}
});
d3.select(self.frameElement).style("height", height + "px");
function isNum(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
function summarize (node) {
var pred = node.predicate;
if (node.parent) {
var summary = summarize(node.parent);
switch(pred.operator) {
case "<=":
if (summary[pred.field]) {
var old_max = summary[pred.field].max;
max = isNum(old_max) ? Math.min(pred.value, old_max) : pred.value;
summary[pred.field].max = max;
} else {
summary[pred.field] = {"max": pred.value};
}
break;
case ">":
if (summary[pred.field]) {
var old_min = summary[pred.field].min;
min = isNum(old_min) ? Math.max(pred.value, old_min) : pred.value;
summary[pred.field].min = min;
} else {
summary[pred.field] = {"min": pred.value};
}
break;
case "=":
summary[pred.field] = {"eq": pred.value};
break;
case "!=":
if (!summary[pred.field]) {
summary[pred.field] = {};
}
if (!summary[pred.field].not_eq) {
summary[pred.field].not_eq = {};
}
summary[pred.field].not_eq[pred.value] = true;
break;
}
return summary;
} else {
return {};
}
}
function table_add (table, field, val) {
var row = table.append("tr");
row.append("td").text(field);
row.append("td").text(val);
return row;
}
</script>
{"public_dataset": false, "code": 200, "locale": "en-US", "short_url": "", "image": "gallery/model.png", "job_type": 400, "private": true, "dataset": "dataset/514bd86f035d07049200003f", "fields_meta": {"count": 5, "total": 5, "limit": 1000, "offset": 0}, "white_box": false, "dataset_id": "514bd86f035d07049200003f", "randomize": false, "id": "514fb3d00c0b5e04c3000093", "number_of_predictions": 0, "category": 0, "modified_fields": {}, "rows": 150, "out_of_bag": false, "ordering": 0, "fields_to_show": [{"optype": "numeric", "is_objective": false, "name": "sepal length", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "4.30"], ["Mean", "5.84"], ["Median", "5.78"], ["Maximum", "7.90"], ["Std dev", "0.83"]], "id": "000000", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "sepal width", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "2.00"], ["Mean", "3.06"], ["Median", "3.02"], ["Maximum", "4.40"], ["Std dev", "0.44"]], "id": "000001", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "petal length", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "1.00"], ["Mean", "3.76"], ["Median", "4.34"], ["Maximum", "6.90"], ["Std dev", "1.77"]], "id": "000002", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "petal width", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "0.10"], ["Mean", "1.20"], ["Median", "1.33"], ["Maximum", "2.50"], ["Std dev", "0.76"]], "id": "000003", "css": "numeric type_label datatype-tooltip"}, {"optype": "categorical", "is_objective": true, "name": "species", "datatype_name": "Categorical", "tooltip": "Categorical", "id": "000004", "css": "categorical type_label datatype-tooltip"}], "views": 0, "size": 4608, "range": [1, 150], "credits_per_prediction": 0.0, "source": "source/514bcf9d0c0b5e3fd100026b", "number_of_public_predictions": 0, "sample_rate": 1.0, "objective_fields": ["000004"], "user_name": "ashenfad", "columns": 5, "selective_pruning": false, "status": {"progress": 1.0, "message": "The model has been created", "code": 5, "elapsed": 89}, "updated": "2013-03-25T02:17:52.927000", "description": "", "tags": [], "price": 0.0, "excluded_fields": [], "cloned": false, "credits": 0.017578125, "stat_pruning": true, "objective_field": "000004", "clones": 0, "resource": "model/514fb3d00c0b5e04c3000093", "name": "Iris Model", "created": "Mon, 25 Mar 2013 02:17:52 +0000", "url": "", "dataset_status": true, "source_status": true, "number_of_evaluations": 0, "max_columns": 5, "max_rows": 150, "input_fields": ["000000", "000001", "000002", "000003"], "published": null, "source_id": "514bcf9d0c0b5e3fd100026b", "model": {"kind": "stree", "missing_strategy": "Last prediction", "importance": [["000002", 0.53556], ["000003", 0.46444], ["000000", 0], ["000001", 0]], "fields": {"000004": {"optype": "categorical", "name": "species", "datatype": "string", "preferred": true, "summary": {"missing_count": 0, "categories": [["Iris-versicolor", 50], ["Iris-setosa", 50], ["Iris-virginica", 50]]}, "column_number": 4, "order": 4}, "000002": {"optype": "numeric", "name": "petal length", "datatype": "double", "preferred": true, "summary": {"sum_squares": 2582.71, "splits": [1.25138, 1.32426, 1.37171, 1.40962, 1.44567, 1.48173, 1.51859, 1.56301, 1.6255, 1.74645, 3.23033, 3.675, 3.94203, 4.0469, 4.18243, 4.34142, 4.45309, 4.51823, 4.61771, 4.72566, 4.83445, 4.93363, 5.03807, 5.1064, 5.20938, 5.43979, 5.5744, 5.6646, 5.81496, 6.02913, 6.38125], "missing_count": 0, "sum": 563.7, "median": 4.34142, "maximum": 6.9, "minimum": 1, "standard_deviation": 1.7653, "variance": 3.11628, "population": 150, "bins": [[1, 1], [1.1, 1], [1.2, 2], [1.3, 7], [1.4, 13], [1.5, 13], [1.63636, 11], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.46, 5], [5.6, 6], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "mean": 3.758}, "column_number": 2, "order": 2}, "000003": {"optype": "numeric", "name": "petal width", "datatype": "double", "preferred": true, "summary": {"sum_squares": 302.33, "missing_count": 0, "sum": 179.9, "median": 1.32848, "maximum": 2.5, "minimum": 0.1, "standard_deviation": 0.76224, "variance": 0.58101, "counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "population": 150, "mean": 1.19933}, "column_number": 3, "order": 3}, "000000": {"optype": "numeric", "name": "sepal length", "datatype": "double", "preferred": true, "summary": {"sum_squares": 5223.85, "splits": [4.51526, 4.67252, 4.81113, 4.89582, 4.96139, 5.01131, 5.05992, 5.11148, 5.18177, 5.35681, 5.44129, 5.5108, 5.58255, 5.65532, 5.71658, 5.77889, 5.85381, 5.97078, 6.05104, 6.13074, 6.23023, 6.29578, 6.35078, 6.41459, 6.49383, 6.63013, 6.70719, 6.79218, 6.92597, 7.20423, 7.64746], "missing_count": 0, "sum": 876.5, "median": 5.77889, "maximum": 7.9, "minimum": 4.3, "standard_deviation": 0.82807, "variance": 0.68569, "population": 150, "bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.7, 2], [4.8, 5], [4.9, 6], [5, 10], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.44167, 12], [6.6, 2], [6.7, 8], [6.8, 3], [6.92, 5], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "mean": 5.84333}, "column_number": 0, "order": 0}, "000001": {"optype": "numeric", "name": "sepal width", "datatype": "double", "preferred": true, "summary": {"sum_squares": 1430.4, "missing_count": 0, "sum": 458.6, "median": 3.02044, "maximum": 4.4, "minimum": 2, "standard_deviation": 0.43587, "variance": 0.18998, "counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "population": 150, "mean": 3.05733}, "column_number": 1, "order": 1}}, "model_fields": {"000004": {"optype": "categorical", "name": "species", "datatype": "string", "preferred": true, "column_number": 4, "order": 4}, "000002": {"optype": "numeric", "name": "petal length", "datatype": "double", "preferred": true, "column_number": 2, "order": 2}, "000003": {"optype": "numeric", "name": "petal width", "datatype": "double", "preferred": true, "column_number": 3, "order": 3}}, "support_threshold": 0, "split_criterion": "Information gain mix", "root": {"count": 150, "confidence": 0.26289, "predicate": true, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-setosa", 50], ["Iris-virginica", 50]]}, "output": "Iris-virginica", "children": [{"count": 50, "output": "Iris-setosa", "confidence": 0.92865, "predicate": {"operator": "<=", "field": "000002", "value": 2.45}, "objective_summary": {"categories": [["Iris-setosa", 50]]}}, {"count": 100, "confidence": 0.40383, "predicate": {"operator": ">", "field": "000002", "value": 2.45}, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-virginica", "children": [{"count": 48, "output": "Iris-virginica", "confidence": 0.86024, "predicate": {"operator": ">", "field": "000003", "value": 1.65}, "objective_summary": {"categories": [["Iris-versicolor", 2], ["Iris-virginica", 46]]}}, {"count": 52, "confidence": 0.81826, "predicate": {"operator": "<=", "field": "000003", "value": 1.65}, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 48]]}, "output": "Iris-versicolor", "children": [{"count": 47, "output": "Iris-versicolor", "confidence": 0.92444, "predicate": {"operator": "<=", "field": "000002", "value": 4.95}, "objective_summary": {"categories": [["Iris-versicolor", 47]]}}, {"count": 5, "output": "Iris-virginica", "confidence": 0.37553, "predicate": {"operator": ">", "field": "000002", "value": 4.95}, "objective_summary": {"categories": [["Iris-versicolor", 1], ["Iris-virginica", 4]]}}]}]}]}, "depth_threshold": 20}, "replacement": false}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment