Last active
August 29, 2015 14:22
-
-
Save geneorama/7d7ec0a1884f9b4defe2 to your computer and use it in GitHub Desktop.
Reshaping with data.table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Using example from Arun's presentation: | |
## https://twitter.com/MattDowle/status/605881443491774464 | |
##------------------------------------------------------------------------------ | |
## INITIALIZE | |
## Load library, copy melt out of data.table, and create data example | |
##------------------------------------------------------------------------------ | |
library(data.table) ## v1.9.4 | |
melt <- data.table:::melt.data.table ## Something I usually do, just in case | |
dt <- structure(list(dad = c("David", "Aaron", "Michael"), | |
mom = c("Angela", "Anita", "Katya"), | |
child1_sex = c("M", "F", "F"), | |
child2_sex = c("F", NA, "F"), | |
child3_sex = c(NA, NA, "M"), | |
child1_age = c(8L, 7L, 5L), | |
child2_age = c(12L, NA, 7L), | |
child3_age = c(NA, NA, 15L)), | |
.Names = c("dad", "mom", "child1_sex", "child2_sex", | |
"child3_sex", "child1_age", "child2_age", | |
"child3_age"), | |
class = c("data.table", "data.frame")) | |
str(dt) | |
##------------------------------------------------------------------------------ | |
## VERSION 1 | |
## The long way, which makes nice intermediate objects dt_age and dt_sex. | |
## This version is relatively easy to follow. | |
##------------------------------------------------------------------------------ | |
dt_age <- melt(data = dt, | |
id.vars = c("dad", "mom"), | |
measure.vars = grep(pattern = "age", | |
x = colnames(dt), | |
value = TRUE), | |
value.name = "age", | |
variable.name = "child") | |
dt_sex <- melt(data = dt, | |
id.vars = c("dad", "mom"), | |
measure.vars = grep(pattern = "sex", | |
x = colnames(dt), | |
value = TRUE), | |
value.name = "sex", | |
variable.name = "child") | |
dt_age[ , child := gsub("_age", "", child)] | |
dt_sex[ , child := gsub("_sex", "", child)] | |
dt_merged <- merge(dt_sex, dt_age, by=c("dad", "mom", "child")) | |
dt_merged | |
##------------------------------------------------------------------------------ | |
## VERSION 2 | |
## This is really just version 1 crammed into one command... but this sort | |
## of thing is useful to me when I really trust the process and want to | |
## minimize intermediate objects. | |
##------------------------------------------------------------------------------ | |
dt_merged <- merge(x = melt(dt, id.vars=c("dad", "mom"), | |
measure.vars=grep("sex", colnames(dt), value=TRUE), | |
value.name="sex", variable.name="child")[ | |
i=TRUE, child := gsub("_sex", "", child)], | |
y = melt(dt, id.vars=c("dad", "mom"), | |
measure.vars=grep("age", colnames(dt), value=TRUE), | |
value.name="age", variable.name="child")[ | |
i=TRUE, child := gsub("_age", "", child)], | |
by = c("dad", "mom", "child")) | |
dt_merged | |
##------------------------------------------------------------------------------ | |
## VERSION 3 | |
## If you wanted to do this often... | |
##------------------------------------------------------------------------------ | |
fn <- function(dat, x){ | |
melt(dat, id.vars=c("dad", "mom"), | |
measure.vars=grep(x, colnames(dat), value=TRUE), | |
value.name=x, variable.name="child")[ | |
i=TRUE, child := gsub(paste0("_",x), "", child)] | |
} | |
dt_merged <- merge(x = fn(dt, "age"), | |
y = fn(dt, "sex"), | |
by = c("dad", "mom", "child")) | |
dt_merged | |
##------------------------------------------------------------------------------ | |
## VERSION 4 | |
## If you wanted to abstract the process | |
##------------------------------------------------------------------------------ | |
ffn <- function(dat, var, key, subkey){ | |
ret <- melt(dat, id.vars = key, | |
measure.vars=grep(var, colnames(dat), value=TRUE), | |
value.name=var, variable.name=subkey) | |
ret[ , eval(subkey):= gsub(paste0("_",var,"$"), "", eval(as.name(subkey)))] | |
return(ret) | |
} | |
dt_merged <- merge(x = ffn(dat=dt, var="age", key=c("dad","mom"), subkey="child"), | |
y = ffn(dat=dt, var="sex", key=c("dad","mom"), subkey="child"), | |
by = c("dad", "mom", "child")) | |
dt_merged | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment