Last active
March 29, 2020 19:02
-
-
Save esjewett/8291936a4e4054342ba4 to your computer and use it in GitHub Desktop.
Example of Reductio/Crossfilter moving average w/ efficient Date calculations for larger data set
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This example data set (not provided) is over 100,000 records. The following code calculates 30-day moving | |
// averages (over 3 million aggregations) using Crossfilter and the Reductio helper library. It takes about | |
// 3 seconds for the initial aggregation in Chrome Canary (42.0.2291.0) on a 2.3 GHz Core i7, mid-2012 rMBP. | |
d3.csv('dataJan-29-2015.csv', function (data) { | |
//convert the iso timestamps to JS Dates | |
var parseDate = d3.time.format("%Y-%m-%dT%H:%M:%S").parse; | |
var ymd = d3.time.format("%Y-%m-%d"); | |
data.forEach(function(d) { | |
d.DollarValue = parseFloat(d.DollarValue); | |
d.ShipDate = ymd(parseDate(d.ShipDate.split("+")[0])); | |
// fix obsolete or missing salesperson names | |
if (d.SalespersonName.indexOf('do not use') > -1 || d.SalespersonName.indexOf('DO NOT USE') > -1 || d.SalespersonName === "") { | |
d.SalespersonName = d.SalespersonNo; | |
} | |
}); | |
var cs_data = crossfilter(data); | |
// Always force dimension values into the right data type. | |
var ShipDateDim = cs_data.dimension(function(d) {return "" + d.ShipDate;}); | |
var countryDim = cs_data.dimension(function (d) {return "" + d.ShipToCountryCode;}); | |
var country_total = countryDim.group().reduceSum(dc.pluck('DollarValue')); | |
// Declare variables used in the group function *outside* of the function. | |
var arr = []; | |
var tempDate = null; | |
var i; | |
var tempStr = ""; | |
var groupFunction = function(record) { | |
arr = []; | |
// If the relevant field isn't populated, don't bother looping. | |
if(record.ShipDate) { | |
// Manually parse dates - don't use the d3 date formatters. They are up to 10x slower. | |
tempDate = new Date(+record.ShipDate.slice(0,4), +record.ShipDate.slice(5,7)-1, +record.ShipDate.slice(8,10)); | |
for(i=0; i<30; i++) { | |
// Javascript Date wraps at month- and year-end (Dec 32 is Jan 1 of the following year) | |
tempDate.setDate(tempDate.getDate() + i); | |
// Again, don't use the date formatters. Be careful to get the right number of digits in months & days. | |
tempStr = tempDate.getFullYear() + '-' + | |
(tempDate.getMonth()<9 ? ('0' + (tempDate.getMonth()+1)) : (tempDate.getMonth()+1)) + | |
'-' + | |
(tempDate.getDate()<10 ? ('0' + (tempDate.getDate())) : (tempDate.getDate())); | |
arr.push(tempStr); | |
} | |
} | |
return arr; | |
}; | |
var reducer = reductio().groupAll(groupFunction).count(true).sum(function(d) { return +d.DollarValue; }).avg(true); | |
console.time("Total Crossfilter calculation"); | |
console.time("Build initial groupAll"); | |
var shipments_moving_avg = ShipDateDim.groupAll(); | |
console.timeEnd("Build initial groupAll"); | |
console.time("Apply reducers"); | |
reducer(shipments_moving_avg); | |
console.timeEnd("Apply reducers"); | |
console.time("groupAll.value()"); | |
shipments_moving_avg.value(); | |
console.timeEnd("groupAll.value()"); | |
console.time("groupAll.value() second time"); | |
shipments_moving_avg.value(); | |
console.timeEnd("groupAll.value() second time"); | |
console.time("filter on country"); | |
countryDim.filter("Canada"); | |
console.timeEnd("filter on country"); | |
console.time("groupAll.value() after filter"); | |
shipments_moving_avg.value(); | |
console.timeEnd("groupAll.value() after filter"); | |
console.timeEnd("Total Crossfilter calculation"); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Ethan, Any chance we could get a list of the columns that were in the dataset?