Last active
December 11, 2015 17:58
-
-
Save joshbode/4638136 to your computer and use it in GitHub Desktop.
SQL analytic-like functions in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(plyr) | |
# compute columns - similar to transform and plyr::mutate | |
compute = function(.data, ..., .append=FALSE) { | |
cols = as.list(substitute(list(...))[-1]) | |
cols = cols[names(cols) != ''] | |
env = parent.frame() | |
# evaluate the columns | |
for (name in names(cols)) { | |
.data[[name]] = eval(cols[[name]], .data, env) | |
} | |
if (!.append) { | |
# copy input data to result if required in output | |
.data = .data[names(cols)] | |
} | |
return(.data) | |
} | |
# test data | |
n = 10000 | |
d = data.frame(x=0:(n-1) %/% 5 + 1, y=sample(1:(2*n), n), z=trunc(runif(n, 1, 4))) | |
# rank the data, partitioned by x, ordered by z, y | |
print(system.time({ | |
r = unlist(dlply(d, .(x), function(r) { | |
order(with(r, order(z, y))) | |
})) | |
})) | |
# same, but with additional calculations, returning all cols in d | |
print(system.time({ | |
d = ddply(d, .(x), transform, | |
r=order(order(z, y)), | |
s=order(order(y)), | |
u=max(y) | |
) | |
})) | |
print(system.time({ | |
d = ddply(d, .(x), mutate, | |
r=order(order(z, y)), | |
s=order(order(y)), | |
u=max(y) | |
) | |
})) | |
print(system.time({ | |
d = ddply(d, .(x), compute, | |
r=order(order(z, y)), | |
s=order(order(y)), | |
u=min(y), | |
l=u | |
) | |
})) | |
print(system.time({ | |
d = ddply(d, .(x), compute, | |
r=order(order(z, y)), | |
s=order(order(y)), | |
u=min(y), | |
.append=TRUE | |
) | |
})) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment