Last active
January 31, 2023 01:56
-
-
Save wch/7107695 to your computer and use it in GitHub Desktop.
Modifying objects in loops in R, and other speed pitfalls
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ============================================= | |
# Modifying a data frame in place | |
# ============================================= | |
library(ggplot2) | |
str(diamonds) | |
# Modify data frame in place, in loop | |
# This is super, super slow | |
system.time({ | |
d <- diamonds | |
for (i in 1:length(d$carat)) d$carat[i] <- i | |
}) | |
# Modify separate vector in loop, then assign back into data frame | |
system.time({ | |
d <- diamonds | |
carat <- d$carat | |
for (i in 1:length(carat)) carat[i] <- i | |
d$carat <- carat | |
}) | |
# Modify separate, growing vector in loop, then assign back into data frame | |
system.time({ | |
d <- diamonds | |
carat <- numeric() | |
for (i in 1:nrow(d)) carat[i] <- i | |
d$carat <- carat | |
}) | |
# Use within() to modify data frame | |
system.time({ | |
d <- within(diamonds, { | |
for (i in 1:length(carat)) carat[i] <- i | |
}) | |
}) | |
# Converting data frame to list and doing assignment is much faster | |
system.time({ | |
d <- as.list(diamonds) | |
for (i in 1:length(d$carat)) d$carat[i] <- i | |
d <- as.data.frame(d) | |
}) | |
# Vectorized assignment | |
system.time({ | |
d <- diamonds | |
d$carat <- 1:length(d$carat) | |
}) | |
# ============================================= | |
# For loops, lapply, and vapply | |
# ============================================= | |
# Grow a vector in place with a for loop | |
system.time({ | |
v <- numeric() | |
for (i in 1:1e5) v[i] <- i*2 | |
}) | |
# lapply (returns list - need to convert to atomic vector) | |
system.time({ | |
v <- lapply(1:1e5, function(x) x*2) | |
v <- unlist(v) | |
}) | |
# Modify pre-allocated vector in place with a for loop | |
# Note this is 10x larger than previous examples | |
system.time({ | |
v <- numeric(1e6) | |
for (i in 1:1e6) v[i] <- i*2 | |
}) | |
# lapply (returns list - need to convert to atomic vector) | |
system.time({ | |
v <- lapply(1:1e6, function(x) x*2) | |
v <- unlist(v) | |
}) | |
# vapply (returns atomic vector) | |
system.time({ | |
v <- vapply(1:1e6, function(x) x*2, FUN.VALUE = numeric(1)) | |
}) | |
# vectorized computation | |
system.time({ | |
v <- (1:1e6) * 2 | |
}) | |
# ============================================= | |
# Modifying lists in place | |
# ============================================= | |
# Grow a list in place with a for loop | |
system.time({ | |
v <- list() | |
for (i in 1:1e5) v[[i]] <- i*2 | |
}) | |
# lapply | |
system.time({ | |
v <- lapply(1:1e5, function(x) x*2) | |
}) | |
# Modify pre-allocated list in place with a for loop | |
system.time({ | |
v <- as.list(numeric(1e5)) | |
for (i in 1:1e5) v[[i]] <- i*2 | |
}) | |
# Modify pre-allocated vector in place with a for loop | |
system.time({ | |
v <- numeric(1e5) | |
for (i in 1:1e5) v[[i]] <- i*2 | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment