Skip to content

Instantly share code, notes, and snippets.

@mgoold
Last active September 24, 2016 16:51
Show Gist options
  • Save mgoold/88f6f798dac245b3261ed1a543b04415 to your computer and use it in GitHub Desktop.
Save mgoold/88f6f798dac245b3261ed1a543b04415 to your computer and use it in GitHub Desktop.
An exploration in how to visualize event co-occurrence.

This is a visual exploration of event co-occurrence that I really like; it's still a work in progress. A common example of co-occurrence data would be the events that users do during the same visit. To make it I cannabilized this gist: http://bl.ocks.org/gencay/4629518 .

The notion here is that any colors placed in the same horizontal plane are co-occurring. The vertical height indicates the volume of users, visits, or whatever, that did/had the events together. For example, you might use this viz to answer the question "how many visitors both search, buy, and message in the same visit?". Using a sort, whatever things co-occur together most will pop right out at the bottom, and the volume rank of co-occurrence is immediately visible.

Some challenges/next things to address with this viz:

  1. I had to take away the y axis in favor of left-side call outs for volume numbers. I did this because of the need to separate the blocks with whitespace for greater clarity, making a y axis inaccurate (it makes no sense to sum over the white spaces). Some additional mark will be needed for the callouts.
  2. As illustrated here, it doesn't play well with change over time (or multiple categories), requiring too much left-right scanning. For a comparison of 2 periods it may work to only show the latest period, and denote change in the callout with additional text "e.g. 100 Users, down 20% from 120 in Jan 2016". In that case, combinations found in the previous period would need to be inserted into the stack, with some visual signal that they belong to the previous time interval. This kind of combination wouldn't really work for different categories (e.g. trial users vs subscribers).
{"headermapping": {"primarydimension": "group:", "values": ["value"], "groups": ["month"]}, "data": [{"group": "A--B--C--D--E", "value": "40", "month": "1/1/16"}, {"group": "A--B", "value": "10", "month": "1/1/16"}, {"group": "C", "value": "15", "month": "1/1/16"}, {"group": "D", "value": "10", "month": "1/1/16"}, {"group": "B--D", "value": "5", "month": "1/1/16"}, {"group": "A--B--C--D", "value": "45", "month": "2/1/16"}, {"group": "A--B", "value": "15", "month": "2/1/16"}, {"group": "C", "value": "10", "month": "2/1/16"}, {"group": "D", "value": "10", "month": "2/1/16"}, {"group": "B--D", "value": "10", "month": "2/1/16"}], "maxcatstring": "A--B--C--D--E"}
<!DOCTYPE html>
<html>
<meta charset="utf-8">
<head>
</head>
<!-- <script type="text/javascript" src="d3.min.js" charset="utf-8"></script> -->
<body>
<script src="https://d3js.org/d3.v4.js"></script>
<!--<div id="chart" style="float:left"></div>-->
<div id="d3space"></div>
<script type="text/javascript">
var margin = {top: 10, right: 30, bottom: 30, left: 10},
width = 980 - (margin.left + margin.right);
height = 400 - (margin.top + margin.bottom);
var svg = d3.select("#d3space").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
// var svg_legend = d3.select("#d3space").append("svg")
// .attr("width", width+margin.left)
// .attr("height", 250)
// .append("g")
// .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var x0 = d3.scaleBand()
.range([0, width])
.round(true)
.padding(.1);
var x1 = d3.scaleBand();
var xAxis = d3.axisBottom(x0);
var y = d3.scaleLinear()
.range([0,height]);
// var yAxis = d3.axisLeft(y).tickFormat(d3.format(".2s"));
d3.json("featureoverlaptestoutput2.json", function(error, json) {
if (error) return console.warn(error)
data=json
maxgrouplength=Math.max(...[for (i of json["data"]) i["group"].split("--").length]) // new ecma 6 functionality like python list comprehensions
var nd = d3.nest()
.key(function(d) { return d.month; })
.entries(json["data"]);
var outercolkey=[for (i of nd) i["key"]];
var innercolkey=json["maxcatstring"].split("--").sort()
var leftmostkey=innercolkey[0];
// READ LEGEND ITEMS INTO D3 DEFAULT ARRAY OF 20 COLOURS
var color = d3.scaleOrdinal(d3.schemeCategory20)
.domain(innercolkey);
greatestsum=[];
nd.forEach(function(d) {
//http://stackoverflow.com/questions/1230233/how-to-find-the-sum-of-an-array-of-numbers
keygroupvalslist=[];
ybegin=0;
yend=0
d["values"].forEach(function(d) {
console.log('d',d);
d.categorysum=vsum;
tempyend=parseInt(d["value"]);
ybegin=yend;
yend=yend+tempyend;
keygroupvals = d["group"].split("--").map(function(name,i)
{return {value:d.value,name: name, ybegin:ybegin, yend:yend, i:i, interrowflg:0, color:color(name)}; //stick a new grouping into data object
}); //javascript stunt: the + sign "+d[name]" forces a type conversion to number
keygroupvals.forEach(function(v,i) {
keygroupvalslist.push(v);
v2=[v].map(function(v){return {value:1,name:v.name, ybegin:v.yend, yend:v.yend+1, i:i, interrowflg:1, color:'#FFFFFF'};});
// console.log('v2',v2[0]);
keygroupvalslist.push(v2[0]);
});
yend=yend+1;
});
var vsum = [for (v of d["values"]) +v["value"]].reduce((a, b) => a + b, 0); //javascript doesn't have a simple sum function, so you have to do this.
vsum=vsum+((d["values"].length-1)*1);
// console.log('vsum',vsum);
greatestsum.push(vsum);
d.keygroupvalslist=keygroupvalslist;
});
greatestsum2=Math.max(...greatestsum);
x0.domain(outercolkey);
x1.domain(innercolkey)
.range([0,x0.bandwidth()],2)
.round(true);
; //x1 domain is range of age names. divide these into even groups based on x0 domain range
y.domain([greatestsum2,0]);
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
// svg.append("g")
// .attr("class", "y axis")
// .call(yAxis)
// .append("text")
// .attr("transform", "rotate(-90)")
// .attr("y", 6)
// .attr("dy", ".71em")
// .style("text-anchor", "end");
var month = svg.selectAll(".month")
.data(nd)
.enter().append("g")
.attr("class", "month")
.attr("transform", function(d) {
return "translate(" + x0(d.key) + ",0)";
});
month.selectAll("rect")
.data(function(d) {
return d.keygroupvalslist;})
.enter().append("rect")
.attr("width", x1.bandwidth())
.attr("x", function(d) {return x1(d.name);})
.attr("y", function(d) {return y(d.yend);})
.attr("height", function(d) {return y(d.ybegin)-y(d.yend);})
.style("fill", function(d) {return d.color;});
month.selectAll("text")
.data(function(d) {
return d.keygroupvalslist;})
.enter().append("text")
.attr("x", function(d) {return x1(leftmostkey)-35;})
.attr("y", function(d) {return y(d.yend)+(y(d.ybegin)-y(d.yend))/2;})
.attr("dy", ".35em")
.attr("text-anchor", "start")
.text(function(d) {if (d.i==0 && d.interrowflg==0) {return d.value;}} );
});
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment