Responses to my tweet: "Help me use dplyr to do this less awkwardly? #rstats"
mtcars %>%
group_by(cyl) %>%
summarize(vs0 = sum(vs == 0), vs1 = sum(vs == 1))
## Source: local data frame [3 x 3]
##
## cyl vs0 vs1
## 1 4 1 10
## 2 6 3 4
## 3 8 14 0
Later clarified:"imagine vs
had 7 levels instead of 2 … how to do for a
general factor … general cross-tabulation"
The #rstats twitterati delivered some great answers!
library(dplyr)
library(tidyr) ## spread()
library(reshape2) ## dcast(), melt()
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:dplyr':
##
## between, last
@noamross
mtcars %>%
group_by(cyl, vs) %>%
summarize(count = n()) %>%
spread(vs, count)
## Source: local data frame [3 x 3]
##
## cyl 0 1
## 1 4 1 10
## 2 6 3 4
## 3 8 14 NA
@tjmahr: "skip summarise(count = n()) and use tally() or count()"
mtcars %>%
group_by(cyl, vs) %>%
tally() %>%
spread(vs, n)
## Source: local data frame [3 x 3]
##
## cyl 0 1
## 1 4 1 10
## 2 6 3 4
## 3 8 14 NA
@dev_dmu
mtcars %>%
count(cyl, vs) %>%
spread(vs, n)
## Source: local data frame [3 x 3]
##
## cyl 0 1
## 1 4 1 10
## 2 6 3 4
## 3 8 14 NA
@tylerrinker
dcast(melt(mtcars[, c("cyl", "vs")], id = "cyl"), cyl ~ value)
## Aggregation function missing: defaulting to length
## cyl 0 1
## 1 4 1 10
## 2 6 3 4
## 3 8 14 0
@daroczig dplyr, reshape2
mtcars %>%
group_by(cyl, vs) %>%
summarize(n()) %>%
dcast(cyl ~ vs)
## Using n() as value column: use value.var to override.
## cyl 0 1
## 1 4 1 10
## 2 6 3 4
## 3 8 14 NA
@daroczig data.table, reshape2
mtcarsDT <- mtcars %>% data.table()
dcast(mtcarsDT[, .N, by = list(cyl, vs)], cyl ~ vs)
## Using N as value column: use value.var to override.
## cyl 0 1
## 1 4 1 10
## 2 6 3 4
## 3 8 14 NA
@MattDowle data.table, reshape2, %>%
mtcarsDT[, .N, .(cyl, vs)] %>% dcast(cyl ~ vs)
## Using N as value column: use value.var to override.
## cyl 0 1
## 1 4 1 10
## 2 6 3 4
## 3 8 14 NA
@dev_dmu bonus content: proportions!
mtcars %>%
count(cyl, vs) %>%
#mutate(prop = n/sum(n))
mutate(prop = prop.table(n))
## Source: local data frame [5 x 4]
## Groups: cyl
##
## cyl vs n prop
## 1 4 0 1 0.09090909
## 2 4 1 10 0.90909091
## 3 6 0 3 0.42857143
## 4 6 1 4 0.57142857
## 5 8 0 14 1.00000000
mtcars %>%
count(cyl, vs) %>%
mutate(prop = prop.table(n)) %>%
select(-n) %>%
spread(vs, prop)
## Source: local data frame [3 x 3]
##
## cyl 0 1
## 1 4 0.09090909 0.9090909
## 2 6 0.42857143 0.5714286
## 3 8 1.00000000 NA