Skip to content

Instantly share code, notes, and snippets.

@feyderm
Last active January 21, 2017 16:48
Show Gist options
  • Save feyderm/e75050449e1c11426d1bb18b9cb4e96a to your computer and use it in GitHub Desktop.
Save feyderm/e75050449e1c11426d1bb18b9cb4e96a to your computer and use it in GitHub Desktop.
z-score to jitterplot

Click z-scores for values, then mouse over for model.

Data is from mtcars.

<!DOCTYPE html>
<head>
<meta charset="utf-8">
<style>
text {
font-family: sans-serif;
}
#z_score_label {
fill: #000000;
}
#jitter_plot_label {
fill: #000000;
text-align: center;
}
.annotate_line {
opacity: 0.2;
stroke-width: 2;
pointer-events: none;
}
.annotate_text {
font-size: 0.75em;
}
#SD_annotation line {
stroke: #ff0000;
}
#SD_annotation text {
fill: #ff0000;
}
#mean_annotation line {
stroke: #000000;
}
#mean_annotation text {
fill: #000000;
}
.outlier {
fill: #ff0000;
stroke: #800000;
opacity: 1.0;
}
.non-outlier {
fill: #737373;
stroke: none;
opacity: 0.3;
}
.jittered_d {
opacity: 0.8;
}
.rect_g {
opacity: 0.0;
}
/*ref: http://bl.ocks.org/d3noob/a22c42db65eb00d4e369*/
#tooltip {
position: absolute;
text-align: center;
padding: 2px;
font-size: 0.8em;
font-family: sans-serif;
background: lightsteelblue;
border: 0px;
border-radius: 8px;
pointer-events: none;
}
</style>
<script src="https://d3js.org/d3.v4.min.js"></script>
<body>
<div id="block"></div>
<script type="text/javascript">
var svg_dx = 600,
svg_dy = 350,
sd_plot_dx = 300,
sd_plot_dy = 350,
margin_sd_plot = {
top: sd_plot_dy * 0.08,
bottom: sd_plot_dy * 0.22,
left: sd_plot_dx * 0.10,
right: sd_plot_dx * 0.10
};
// vertical midline of jitterplot for x positioning
var jitter_plot_x_midline = sd_plot_dx + 50;
// track whether jitterplot is visible
var is_jitter_plot_visible = false;
// random number generator for jitter
var xJitter = d3.randomUniform(-jitter_plot_x_midline * 0.10,
jitter_plot_x_midline * 0.10);
// sd distance to be outlier
var sd_mult = 2.0;
var svg = d3.select("body")
.append("svg")
.attr("height", svg_dy)
.attr("width", svg_dx);
var sd_plot = svg.append("g")
.attr("id", "sd_plot");
var tooltip = d3.select("body")
.append("div")
.attr("id", "tooltip")
.style("opacity", 0);
d3.csv("mtcars_melted.csv", d => {
// group melted data by variable
var d_grouped = d3.nest()
.key(d => d.variable)
.entries(d);
// stats by variable
calcStatsByVar(d_grouped);
// sort variables by max SD
sortBySd(d_grouped);
// initially all variables in SD plot and not expanded to jitterplot
d_grouped.forEach(v => v._jittered = false);
// max and min SD dist for x-axis
var sd_dist = {
max : d3.max(d_grouped, d => d._extentSD[1]),
min : d3.min(d_grouped, d => d._extentSD[0])
};
// SD plot x scale and axis
var xScale = d3.scaleLinear()
.domain([sd_dist.min, sd_dist.max])
.range([margin_sd_plot.left, sd_plot_dx - margin_sd_plot.right]);
var xAxis = d3.axisBottom(xScale);
// SD plot y scale
var yScale = d3.scalePoint()
.domain(d_grouped.map(d => d.key))
.range([margin_sd_plot.top, sd_plot_dy - margin_sd_plot.bottom]);
// group elements by variable
var vars = sd_plot.selectAll("g")
.data(d_grouped)
.enter()
.append("g")
.attr("class", "variable");
// for each variable, plot rects for each datum
// note: arrow function does not bind 'this'
vars.each(function(v) {
d3.select(this)
.selectAll("rect")
.data(v.values)
.enter()
.append("rect")
.attr("x", d => xScale(+d._sdMult))
.attr("y", () => yScale(v.key))
.attr("width", 5)
.attr("height", yScale.step() * 0.75)
.attr("class", d => d._outlier ? "outlier rect_data" : "non-outlier rect_data")
.on("mouseover", d => displayTooltip(d.model))
.on("mouseout", hideTooltip);
});
// overlay grouping rect
// note: rect needed because in Chrome event listeners to g element
// are bound to constitutive elements and not entire g rect
vars.append("rect")
.attr("class", "rect_g")
.attr("x", margin_sd_plot.left)
.attr("y", v => yScale(v.key))
.attr("width", sd_plot_dx - margin_sd_plot.right - margin_sd_plot.left)
.attr("height", yScale.step() * 0.75)
.on("mouseover", v => displayTooltip(v.key))
.on("mousemove", v => displayTooltip(v.key))
.on("mouseout", hideTooltip)
.on("click", function(v) {
if (!is_jitter_plot_visible && v._jittered == false) {
expandToJitterplot(this, v);
v._jittered = true;
is_jitter_plot_visible = true;
} else if (is_jitter_plot_visible && v._jittered == true) {
collapseToSdPlot(this, v, xScale, yScale);
v._jittered = false;
is_jitter_plot_visible = false;
}
});
// plot x-axis
sd_plot.append("g")
.attr("id", "z_score_axis")
.call(xAxis);
// x-axis label
d3.select("#z_score_axis")
.append("text")
.attr("id", "z_score_label")
.text("z-score")
.attr("transform", "translate(20, 15)");
// plot SD line
sd_plot.append("g")
.attr("id", "SD_annotation")
.attr("transform", "translate(" + xScale(sd_mult) + ",0)")
.call(addLine, "SD_line", "SD_label", sd_mult + " SD");
// plot mean line
sd_plot.append("g")
.attr("id", "mean_annotation")
.attr("transform", "translate(" + xScale(0) + ",0)")
.call(addLine, "mean_line", "mean_label", "mean");
});
function collapseToSdPlot(rect_g, d, xScale, yScale) {
// remove toolTip
hideTooltip();
// revert data rects
d3.select(rect_g.parentNode)
.selectAll(".rect_data")
.classed("jittered_d", false)
.transition()
.duration(500)
.attr("x", d => xScale(+d._sdMult))
.attr("y", () => yScale(d.key))
.attr("width", 5)
.attr("height", yScale.step() * 0.75)
.attr("rx", 0)
.attr("ry", 0);
// revert grouping rect and re-apply tooltip
d3.select(rect_g)
.attr("x", margin_sd_plot.left)
.attr("y", v => yScale(v.key))
.attr("width", sd_plot_dx - margin_sd_plot.right - margin_sd_plot.left)
.attr("height", yScale.step() * 0.75)
.on("mouseover", v => displayTooltip(v.key))
.on("mousemove", v => displayTooltip(v.key))
.on("mouseout", hideTooltip);
// raise rect to mask circle mouseover events
d3.select(rect_g)
.raise();
// remove x-axis of jitterplot
d3.select("#jitter_plot_axis")
.remove();
// remove jitterplot title
d3.select("#jitter_plot_label")
.remove();
}
function expandToJitterplot(rect_g, d) {
// min and max of variable values
var d_extent = d3.extent(d.values, d => +d.value);
// extend range for aesthetics
var d_extent_plus = {
min : d_extent[0] - (d_extent[0] * 0.05),
max : d_extent[1] + (d_extent[1] * 0.05),
};
// y scale for jitterplot
var yScaleJittered = d3.scaleLinear()
.domain([d_extent_plus.min, d_extent_plus.max])
.range([svg_dy - margin_sd_plot.bottom, margin_sd_plot.top]);
// y axis for jitterplot
var yAxisJittered = d3.axisLeft(yScaleJittered);
// remove tooltip of variable names
hideTooltip();
// circle dimensions
var dim = 8;
// jitterplot transition
d3.select(rect_g.parentNode)
.selectAll(".rect_data")
.classed("jittered_d", true)
.transition()
.duration(500)
.attr("height", dim)
.attr("width", dim)
.attr("rx", dim)
.attr("ry", dim)
.attr("x", () => sd_plot_dx + 100 + xJitter())
.attr("y", d => yScaleJittered(+d.value))
.attr("transform", "translate(0," + -(dim / 2) + ")"); // centers rect
// transition grouping rect and remove its event listeners
d3.select(rect_g)
.classed("jittered", true)
.attr("x", sd_plot_dx)
.attr("y", 0)
.attr("width", svg_dx - sd_plot_dx)
.attr("height", svg_dy)
.on("mouseover", null)
.on("mousemove", null)
.on("mouseout", null);
// lower group rect to display circle mouseover events
d3.select(rect_g)
.lower();
// add y-axis
yAxisJitter = d3.select("svg")
.append("g")
.attr("id", "jitter_plot_axis")
.call(yAxisJittered);
yAxisJitter.attr("transform", "translate(" + (sd_plot_dx + 50) + ", 0)");
// add jitterplot title
d3.select(rect_g.parentNode)
.append("text")
.attr("id", "jitter_plot_label")
.text(d.key)
.attr("transform", "translate(" + jitter_plot_x_midline + "," + 17 + ")");
}
function displayTooltip(d) {
tooltip.html(d)
.style("left", (d3.event.pageX + 8) + "px")
.style("top", (d3.event.pageY - 20) + "px")
.style("opacity", 0.9);
}
function hideTooltip() {
tooltip.style("opacity", 0);
}
function addLine(selection, line_ID, text_ID, text) {
selection.append("line")
.attr("class", "annotate_line")
.attr("id", line_ID)
.attr("x1", 0)
.attr("y1", margin_sd_plot.top)
.attr("x2", 0)
.attr("y2", sd_plot_dy);
selection.append("text")
.text(text)
.attr("class", "annotate_text")
.attr("id", text_ID)
.attr("transform", "translate(-4," + sd_plot_dy + ") rotate(270)");
}
function isOutlier(value, group) {
var is_gt_eq_thes = value >= group._mean + (sd_mult * group._sd),
is_lt_eq_thres = value <= group._mean - (sd_mult * group._sd);
if (is_gt_eq_thes | is_lt_eq_thres) {
return true;
} else {
return false;
}
}
function calcStatsByVar(d_grouped) {
// for each variable
d_grouped.forEach(v => {
// standard deviation
v._sd = d3.deviation(v.values, d => +d.value);
// mean
v._mean = d3.mean(v.values, d => +d.value);
// flag outliers
v.values.forEach(d => isOutlier(+d.value, v) ? d._outlier = true : d._outlier = false);
// flag variables having outlier
v._hasOutlier = v.values.map(d => d._outlier).includes(true);
// multiple of SD from mean (+ / -)
v.values.forEach(d => d._sdMult = (d.value - v._mean) / v._sd)
// min and max of SDs
v._extentSD = d3.extent(v.values, d => +d._sdMult);
});
}
function sortBySd(d_grouped) {
// sort groups by max SD multiple distance, in descending order
d_grouped.sort((a, b) => d3.descending(a._extentSD[1], b._extentSD[1]));
}
</script>
</body>
model variable value
Mazda RX4 mile per US gallon 21
Mazda RX4 Wag mile per US gallon 21
Datsun 710 mile per US gallon 22.8
Hornet 4 Drive mile per US gallon 21.4
Hornet Sportabout mile per US gallon 18.7
Valiant mile per US gallon 18.1
Duster 360 mile per US gallon 14.3
Merc 240D mile per US gallon 24.4
Merc 230 mile per US gallon 22.8
Merc 280 mile per US gallon 19.2
Merc 280C mile per US gallon 17.8
Merc 450SE mile per US gallon 16.4
Merc 450SL mile per US gallon 17.3
Merc 450SLC mile per US gallon 15.2
Cadillac Fleetwood mile per US gallon 10.4
Lincoln Continental mile per US gallon 10.4
Chrysler Imperial mile per US gallon 14.7
Fiat 128 mile per US gallon 32.4
Honda Civic mile per US gallon 30.4
Toyota Corolla mile per US gallon 33.9
Toyota Corona mile per US gallon 21.5
Dodge Challenger mile per US gallon 15.5
AMC Javelin mile per US gallon 15.2
Camaro Z28 mile per US gallon 13.3
Pontiac Firebird mile per US gallon 19.2
Fiat X1-9 mile per US gallon 27.3
Porsche 914-2 mile per US gallon 26
Lotus Europa mile per US gallon 30.4
Ford Pantera L mile per US gallon 15.8
Ferrari Dino mile per US gallon 19.7
Maserati Bora mile per US gallon 15
Volvo 142E mile per US gallon 21.4
Mazda RX4 displacement (cu. in.) 160
Mazda RX4 Wag displacement (cu. in.) 160
Datsun 710 displacement (cu. in.) 108
Hornet 4 Drive displacement (cu. in.) 258
Hornet Sportabout displacement (cu. in.) 360
Valiant displacement (cu. in.) 225
Duster 360 displacement (cu. in.) 360
Merc 240D displacement (cu. in.) 146.7
Merc 230 displacement (cu. in.) 140.8
Merc 280 displacement (cu. in.) 167.6
Merc 280C displacement (cu. in.) 167.6
Merc 450SE displacement (cu. in.) 275.8
Merc 450SL displacement (cu. in.) 275.8
Merc 450SLC displacement (cu. in.) 275.8
Cadillac Fleetwood displacement (cu. in.) 472
Lincoln Continental displacement (cu. in.) 460
Chrysler Imperial displacement (cu. in.) 440
Fiat 128 displacement (cu. in.) 78.7
Honda Civic displacement (cu. in.) 75.7
Toyota Corolla displacement (cu. in.) 71.1
Toyota Corona displacement (cu. in.) 120.1
Dodge Challenger displacement (cu. in.) 318
AMC Javelin displacement (cu. in.) 304
Camaro Z28 displacement (cu. in.) 350
Pontiac Firebird displacement (cu. in.) 400
Fiat X1-9 displacement (cu. in.) 79
Porsche 914-2 displacement (cu. in.) 120.3
Lotus Europa displacement (cu. in.) 95.1
Ford Pantera L displacement (cu. in.) 351
Ferrari Dino displacement (cu. in.) 145
Maserati Bora displacement (cu. in.) 301
Volvo 142E displacement (cu. in.) 121
Mazda RX4 gross horsepower 110
Mazda RX4 Wag gross horsepower 110
Datsun 710 gross horsepower 93
Hornet 4 Drive gross horsepower 110
Hornet Sportabout gross horsepower 175
Valiant gross horsepower 105
Duster 360 gross horsepower 245
Merc 240D gross horsepower 62
Merc 230 gross horsepower 95
Merc 280 gross horsepower 123
Merc 280C gross horsepower 123
Merc 450SE gross horsepower 180
Merc 450SL gross horsepower 180
Merc 450SLC gross horsepower 180
Cadillac Fleetwood gross horsepower 205
Lincoln Continental gross horsepower 215
Chrysler Imperial gross horsepower 230
Fiat 128 gross horsepower 66
Honda Civic gross horsepower 52
Toyota Corolla gross horsepower 65
Toyota Corona gross horsepower 97
Dodge Challenger gross horsepower 150
AMC Javelin gross horsepower 150
Camaro Z28 gross horsepower 245
Pontiac Firebird gross horsepower 175
Fiat X1-9 gross horsepower 66
Porsche 914-2 gross horsepower 91
Lotus Europa gross horsepower 113
Ford Pantera L gross horsepower 264
Ferrari Dino gross horsepower 175
Maserati Bora gross horsepower 335
Volvo 142E gross horsepower 109
Mazda RX4 rear axle ratio 3.9
Mazda RX4 Wag rear axle ratio 3.9
Datsun 710 rear axle ratio 3.85
Hornet 4 Drive rear axle ratio 3.08
Hornet Sportabout rear axle ratio 3.15
Valiant rear axle ratio 2.76
Duster 360 rear axle ratio 3.21
Merc 240D rear axle ratio 3.69
Merc 230 rear axle ratio 3.92
Merc 280 rear axle ratio 3.92
Merc 280C rear axle ratio 3.92
Merc 450SE rear axle ratio 3.07
Merc 450SL rear axle ratio 3.07
Merc 450SLC rear axle ratio 3.07
Cadillac Fleetwood rear axle ratio 2.93
Lincoln Continental rear axle ratio 3
Chrysler Imperial rear axle ratio 3.23
Fiat 128 rear axle ratio 4.08
Honda Civic rear axle ratio 4.93
Toyota Corolla rear axle ratio 4.22
Toyota Corona rear axle ratio 3.7
Dodge Challenger rear axle ratio 2.76
AMC Javelin rear axle ratio 3.15
Camaro Z28 rear axle ratio 3.73
Pontiac Firebird rear axle ratio 3.08
Fiat X1-9 rear axle ratio 4.08
Porsche 914-2 rear axle ratio 4.43
Lotus Europa rear axle ratio 3.77
Ford Pantera L rear axle ratio 4.22
Ferrari Dino rear axle ratio 3.62
Maserati Bora rear axle ratio 3.54
Volvo 142E rear axle ratio 4.11
Mazda RX4 weight (x 1000 lbs) 2.62
Mazda RX4 Wag weight (x 1000 lbs) 2.875
Datsun 710 weight (x 1000 lbs) 2.32
Hornet 4 Drive weight (x 1000 lbs) 3.215
Hornet Sportabout weight (x 1000 lbs) 3.44
Valiant weight (x 1000 lbs) 3.46
Duster 360 weight (x 1000 lbs) 3.57
Merc 240D weight (x 1000 lbs) 3.19
Merc 230 weight (x 1000 lbs) 3.15
Merc 280 weight (x 1000 lbs) 3.44
Merc 280C weight (x 1000 lbs) 3.44
Merc 450SE weight (x 1000 lbs) 4.07
Merc 450SL weight (x 1000 lbs) 3.73
Merc 450SLC weight (x 1000 lbs) 3.78
Cadillac Fleetwood weight (x 1000 lbs) 5.25
Lincoln Continental weight (x 1000 lbs) 5.424
Chrysler Imperial weight (x 1000 lbs) 5.345
Fiat 128 weight (x 1000 lbs) 2.2
Honda Civic weight (x 1000 lbs) 1.615
Toyota Corolla weight (x 1000 lbs) 1.835
Toyota Corona weight (x 1000 lbs) 2.465
Dodge Challenger weight (x 1000 lbs) 3.52
AMC Javelin weight (x 1000 lbs) 3.435
Camaro Z28 weight (x 1000 lbs) 3.84
Pontiac Firebird weight (x 1000 lbs) 3.845
Fiat X1-9 weight (x 1000 lbs) 1.935
Porsche 914-2 weight (x 1000 lbs) 2.14
Lotus Europa weight (x 1000 lbs) 1.513
Ford Pantera L weight (x 1000 lbs) 3.17
Ferrari Dino weight (x 1000 lbs) 2.77
Maserati Bora weight (x 1000 lbs) 3.57
Volvo 142E weight (x 1000 lbs) 2.78
Mazda RX4 1/4 mile time (sec.) 16.46
Mazda RX4 Wag 1/4 mile time (sec.) 17.02
Datsun 710 1/4 mile time (sec.) 18.61
Hornet 4 Drive 1/4 mile time (sec.) 19.44
Hornet Sportabout 1/4 mile time (sec.) 17.02
Valiant 1/4 mile time (sec.) 20.22
Duster 360 1/4 mile time (sec.) 15.84
Merc 240D 1/4 mile time (sec.) 20
Merc 230 1/4 mile time (sec.) 22.9
Merc 280 1/4 mile time (sec.) 18.3
Merc 280C 1/4 mile time (sec.) 18.9
Merc 450SE 1/4 mile time (sec.) 17.4
Merc 450SL 1/4 mile time (sec.) 17.6
Merc 450SLC 1/4 mile time (sec.) 18
Cadillac Fleetwood 1/4 mile time (sec.) 17.98
Lincoln Continental 1/4 mile time (sec.) 17.82
Chrysler Imperial 1/4 mile time (sec.) 17.42
Fiat 128 1/4 mile time (sec.) 19.47
Honda Civic 1/4 mile time (sec.) 18.52
Toyota Corolla 1/4 mile time (sec.) 19.9
Toyota Corona 1/4 mile time (sec.) 20.01
Dodge Challenger 1/4 mile time (sec.) 16.87
AMC Javelin 1/4 mile time (sec.) 17.3
Camaro Z28 1/4 mile time (sec.) 15.41
Pontiac Firebird 1/4 mile time (sec.) 17.05
Fiat X1-9 1/4 mile time (sec.) 18.9
Porsche 914-2 1/4 mile time (sec.) 16.7
Lotus Europa 1/4 mile time (sec.) 16.9
Ford Pantera L 1/4 mile time (sec.) 14.5
Ferrari Dino 1/4 mile time (sec.) 15.5
Maserati Bora 1/4 mile time (sec.) 14.6
Volvo 142E 1/4 mile time (sec.) 18.6
library(magrittr)
library(reshape2)
library(dplyr)
# melt, later group by w/ d3.nest()
melt <- mtcars %>%
cbind(model = row.names(.), .) %>%
dplyr::select(model, mpg, disp, hp, drat, wt, qsec) %>%
reshape2::melt()
# full variable names
melt$variable %<>%
gsub(pattern = "mpg", replacement = "mile per US gallon") %>%
gsub(pattern = "disp", replacement = "displacement (cu. in.)") %>%
gsub(pattern = "hp", replacement = "gross horsepower") %>%
gsub(pattern = "drat", replacement = "rear axle ratio") %>%
gsub(pattern = "wt", replacement = "weight (x 1000 lbs)") %>%
gsub(pattern = "qsec", replacement = "1/4 mile time (sec.)")
melt %>% write.csv("mtcars_melted.csv", row.names = F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment