Last active
August 29, 2015 14:01
-
-
Save skranz/85ba6dbe406a13d3e262 to your computer and use it in GitHub Desktop.
modify, a function that wraps data.table for quick replacement of values in selected rows
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(data.table) | |
EmptySymbol = function() (quote(f(,)))[[2]] | |
get.data.table.modify.call = function(args=NULL, filter.call=NULL, by=NULL, dat.quote=quote(dt)) { | |
if (length(args)==1) { | |
com = call(":=",names(args)[1],args[[1]]) | |
} else { | |
com = as.call(c(list(quote(`:=`)),args)) | |
} | |
if (is.null(filter.call)) { | |
ca = call('[',dat.quote, EmptySymbol(),com ) | |
} else { | |
ca = call('[',dat.quote, filter.call, com) | |
} | |
if (!is.null(by)) { | |
ca$by = by | |
} | |
ca | |
} | |
# A verb to replace data | |
modify = function(.data,...,.filter,.by=NULL, .envir=parent.frame()) { | |
.data = substitute(.data) | |
args = eval(substitute(alist(...))) | |
data.var = as.character(.data) | |
dat = get(data.var,.envir) | |
if (!is.data.table(dat)) { | |
dt = as.data.table(dat) | |
} else { | |
dt = dat | |
} | |
if (missing(.filter)) { | |
filter.call=NULL | |
} else { | |
filter.call=substitute(.filter) | |
} | |
ca = get.data.table.modify.call(args=args, by=.by, filter.call=filter.call) | |
eval(ca) | |
if (!is.data.table(dat)) { | |
if (is.tbl(dat)) { | |
dat = as.tbl(dt) | |
} else { | |
dat = as(dt, class(dat)) | |
} | |
assign(data.var, dat, .envir) | |
} | |
invisible(dat) | |
} | |
examples.modify = function() { | |
library(dplyr) | |
library(data.table) | |
n = 1e6 | |
df = data.frame(a= sample(1:5,n,replace=TRUE), | |
b= sample(1:100,n,replace=TRUE), | |
x=rnorm(n)) | |
dt = as.data.table(df) | |
tbl = as.tbl(df) | |
#modify(tbl, x = x+100,.filter=a==2) | |
microbenchmark(times = 5L, | |
modify(tbl, x = x+100,.filter=a==2), | |
modify(df, x = x+100,.filter=a==2), | |
modify(dt, x = x+100,.filter=a==2), | |
dt[a==2,x:=x+100], | |
mutate.df = mutate(df, x=ifelse(a==2,x+1,x)), | |
mutate.tbl = mutate(tbl, x=ifelse(a==2,x+1,x)) | |
) | |
## Results | |
## Unit: milliseconds | |
## expr min lq median uq max neval | |
## modify(tbl, x = x + 100, .filter = a == 2) 54.96051 56.38434 61.31385 61.50925 67.71102 5 | |
## modify(df, x = x + 100, .filter = a == 2) 65.90863 85.99066 86.04277 86.13829 91.89006 5 | |
## modify(dt, x = x + 100, .filter = a == 2) 50.61876 56.41078 61.40109 61.45043 61.79544 5 | |
## dt[a == 2, `:=`(x, x + 100)] 50.85836 56.50513 60.78332 61.05451 67.58312 5 | |
## mutate.df 794.84943 821.20072 827.20788 837.43679 849.53164 5 | |
## mutate.tbl 788.86398 802.19081 835.90954 843.26593 899.45605 5 | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment