Skip to content

Instantly share code, notes, and snippets.

@geneorama
Last active August 29, 2015 14:22
Show Gist options
  • Save geneorama/7d7ec0a1884f9b4defe2 to your computer and use it in GitHub Desktop.
Save geneorama/7d7ec0a1884f9b4defe2 to your computer and use it in GitHub Desktop.
Reshaping with data.table
## Using example from Arun's presentation:
## https://twitter.com/MattDowle/status/605881443491774464
##------------------------------------------------------------------------------
## INITIALIZE
## Load library, copy melt out of data.table, and create data example
##------------------------------------------------------------------------------
library(data.table) ## v1.9.4
melt <- data.table:::melt.data.table ## Something I usually do, just in case
dt <- structure(list(dad = c("David", "Aaron", "Michael"),
mom = c("Angela", "Anita", "Katya"),
child1_sex = c("M", "F", "F"),
child2_sex = c("F", NA, "F"),
child3_sex = c(NA, NA, "M"),
child1_age = c(8L, 7L, 5L),
child2_age = c(12L, NA, 7L),
child3_age = c(NA, NA, 15L)),
.Names = c("dad", "mom", "child1_sex", "child2_sex",
"child3_sex", "child1_age", "child2_age",
"child3_age"),
class = c("data.table", "data.frame"))
str(dt)
##------------------------------------------------------------------------------
## VERSION 1
## The long way, which makes nice intermediate objects dt_age and dt_sex.
## This version is relatively easy to follow.
##------------------------------------------------------------------------------
dt_age <- melt(data = dt,
id.vars = c("dad", "mom"),
measure.vars = grep(pattern = "age",
x = colnames(dt),
value = TRUE),
value.name = "age",
variable.name = "child")
dt_sex <- melt(data = dt,
id.vars = c("dad", "mom"),
measure.vars = grep(pattern = "sex",
x = colnames(dt),
value = TRUE),
value.name = "sex",
variable.name = "child")
dt_age[ , child := gsub("_age", "", child)]
dt_sex[ , child := gsub("_sex", "", child)]
dt_merged <- merge(dt_sex, dt_age, by=c("dad", "mom", "child"))
dt_merged
##------------------------------------------------------------------------------
## VERSION 2
## This is really just version 1 crammed into one command... but this sort
## of thing is useful to me when I really trust the process and want to
## minimize intermediate objects.
##------------------------------------------------------------------------------
dt_merged <- merge(x = melt(dt, id.vars=c("dad", "mom"),
measure.vars=grep("sex", colnames(dt), value=TRUE),
value.name="sex", variable.name="child")[
i=TRUE, child := gsub("_sex", "", child)],
y = melt(dt, id.vars=c("dad", "mom"),
measure.vars=grep("age", colnames(dt), value=TRUE),
value.name="age", variable.name="child")[
i=TRUE, child := gsub("_age", "", child)],
by = c("dad", "mom", "child"))
dt_merged
##------------------------------------------------------------------------------
## VERSION 3
## If you wanted to do this often...
##------------------------------------------------------------------------------
fn <- function(dat, x){
melt(dat, id.vars=c("dad", "mom"),
measure.vars=grep(x, colnames(dat), value=TRUE),
value.name=x, variable.name="child")[
i=TRUE, child := gsub(paste0("_",x), "", child)]
}
dt_merged <- merge(x = fn(dt, "age"),
y = fn(dt, "sex"),
by = c("dad", "mom", "child"))
dt_merged
##------------------------------------------------------------------------------
## VERSION 4
## If you wanted to abstract the process
##------------------------------------------------------------------------------
ffn <- function(dat, var, key, subkey){
ret <- melt(dat, id.vars = key,
measure.vars=grep(var, colnames(dat), value=TRUE),
value.name=var, variable.name=subkey)
ret[ , eval(subkey):= gsub(paste0("_",var,"$"), "", eval(as.name(subkey)))]
return(ret)
}
dt_merged <- merge(x = ffn(dat=dt, var="age", key=c("dad","mom"), subkey="child"),
y = ffn(dat=dt, var="sex", key=c("dad","mom"), subkey="child"),
by = c("dad", "mom", "child"))
dt_merged
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment