Skip to content

Instantly share code, notes, and snippets.

@esjewett
Last active March 29, 2020 19:02
Show Gist options
  • Save esjewett/8291936a4e4054342ba4 to your computer and use it in GitHub Desktop.
Save esjewett/8291936a4e4054342ba4 to your computer and use it in GitHub Desktop.
Example of Reductio/Crossfilter moving average w/ efficient Date calculations for larger data set
// This example data set (not provided) is over 100,000 records. The following code calculates 30-day moving
// averages (over 3 million aggregations) using Crossfilter and the Reductio helper library. It takes about
// 3 seconds for the initial aggregation in Chrome Canary (42.0.2291.0) on a 2.3 GHz Core i7, mid-2012 rMBP.
d3.csv('dataJan-29-2015.csv', function (data) {
//convert the iso timestamps to JS Dates
var parseDate = d3.time.format("%Y-%m-%dT%H:%M:%S").parse;
var ymd = d3.time.format("%Y-%m-%d");
data.forEach(function(d) {
d.DollarValue = parseFloat(d.DollarValue);
d.ShipDate = ymd(parseDate(d.ShipDate.split("+")[0]));
// fix obsolete or missing salesperson names
if (d.SalespersonName.indexOf('do not use') > -1 || d.SalespersonName.indexOf('DO NOT USE') > -1 || d.SalespersonName === "") {
d.SalespersonName = d.SalespersonNo;
}
});
var cs_data = crossfilter(data);
// Always force dimension values into the right data type.
var ShipDateDim = cs_data.dimension(function(d) {return "" + d.ShipDate;});
var countryDim = cs_data.dimension(function (d) {return "" + d.ShipToCountryCode;});
var country_total = countryDim.group().reduceSum(dc.pluck('DollarValue'));
// Declare variables used in the group function *outside* of the function.
var arr = [];
var tempDate = null;
var i;
var tempStr = "";
var groupFunction = function(record) {
arr = [];
// If the relevant field isn't populated, don't bother looping.
if(record.ShipDate) {
// Manually parse dates - don't use the d3 date formatters. They are up to 10x slower.
tempDate = new Date(+record.ShipDate.slice(0,4), +record.ShipDate.slice(5,7)-1, +record.ShipDate.slice(8,10));
for(i=0; i<30; i++) {
// Javascript Date wraps at month- and year-end (Dec 32 is Jan 1 of the following year)
tempDate.setDate(tempDate.getDate() + i);
// Again, don't use the date formatters. Be careful to get the right number of digits in months & days.
tempStr = tempDate.getFullYear() + '-' +
(tempDate.getMonth()<9 ? ('0' + (tempDate.getMonth()+1)) : (tempDate.getMonth()+1)) +
'-' +
(tempDate.getDate()<10 ? ('0' + (tempDate.getDate())) : (tempDate.getDate()));
arr.push(tempStr);
}
}
return arr;
};
var reducer = reductio().groupAll(groupFunction).count(true).sum(function(d) { return +d.DollarValue; }).avg(true);
console.time("Total Crossfilter calculation");
console.time("Build initial groupAll");
var shipments_moving_avg = ShipDateDim.groupAll();
console.timeEnd("Build initial groupAll");
console.time("Apply reducers");
reducer(shipments_moving_avg);
console.timeEnd("Apply reducers");
console.time("groupAll.value()");
shipments_moving_avg.value();
console.timeEnd("groupAll.value()");
console.time("groupAll.value() second time");
shipments_moving_avg.value();
console.timeEnd("groupAll.value() second time");
console.time("filter on country");
countryDim.filter("Canada");
console.timeEnd("filter on country");
console.time("groupAll.value() after filter");
shipments_moving_avg.value();
console.timeEnd("groupAll.value() after filter");
console.timeEnd("Total Crossfilter calculation");
});
@chrisesmith916
Copy link

Hi Ethan, Any chance we could get a list of the columns that were in the dataset?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment