Built with blockbuilder.org
Nesting and summarizing data is a very common task for data analysis. I thought it would be nice to view parallel ways of nesting and summarizing with both
- R |
tidyr
anddplyr
- JavaScript |
d3.js
...v4 for fun
To avoid context switching, I'll take advantage of the R package V8
. If you are an R user, then these d3.js
gists might be helpful d3 nest examples and Mister Nester.
# play with nesting with tidyr and d3.js
library(dplyr)
library(tidyr)
library(V8)
# optionally use
# devtools::install_github(timelyportfolio/listviewer")
# library(listviewer)
##### nest mtcars in R ###########
# use base R
mtcars %>%
split(.$cyl)
# use the hadleyverse
(nested_r <- mtcars %>%
nest(-cyl) %>% #can stop here, you're nested
tbl_df()) #use tbl_df clean view
# see it in viewer
nested_r %>%
rename(key=cyl, values=data) %>% #not necessary but do to match d3
jsonlite::toJSON() %>% #just verify with a json
listviewer::jsonedit() #visualize it
##### nest mtcars in d3.js ########
# let's do it in d3.js in V8
# get context to run our JavaScript
ctx <- v8()
# source the d3.js using d3 v4 d3-collection
ctx$source("https://d3js.org/d3-collection.v0.1.min.js")
# could use d3 v3
# ctx$source("https://d3js.org/d3.v3.min.js")
ctx$get("Object.keys(global)") #verify that d3_array is there
ctx$assign("mtcars",mtcars) #supply our context with mtcars data
(nested_d3 <- ctx$get(
"d3_collection.nest()
.key(function(d){return d.cyl})
.entries(mtcars)"
) %>% tbl_df()) # use tbl_df clean view
# optionally view structure
listviewer::jsonedit(jsonlite::toJSON(nested_d3,dataframe="rows"))
#### do a simple mean of mpg by cyl #########
(mean_r <- nested_r %>%
group_by(cyl) %>%
summarise(mpg = mean(data[[1]]$mpg)) %>%
ungroup())
# add d3-array to get the mean
ctx$source("https://d3js.org/d3-array.v0.7.min.js")
(mean_d3 <- ctx$get(
"d3_collection.nest()
.key(function(d){return d.cyl})
.rollup(function(d){
return d3_array.mean(d,function(d){return d.mpg})
})
.entries(mtcars)"
))
# check our answers
mean_d3 %>%
mutate(key=as.numeric(key)) %>%
full_join(mean_r, c("key"="cyl")) %>%
mutate("equals" = trunc(values,digits=5) == trunc(mpg,digits=5))
datalib
is an extremely helpful JavaScript data library that is part of vega
.
library(V8)
## try in datalib
ctx <- v8()
ctx$source("http://vega.github.io/datalib/datalib.min.js")
ctx$assign("mtcars", mtcars)
# nest
ctx$get(
"
dl.groupby('cyl')
.execute(mtcars)
"
)
# summarize
ctx$get(
"
dl.groupby('cyl')
.summarize({'mpg':'mean'})
.execute(mtcars)
"
)