-
-
Save dchakro/8b1e97ba6853563dd0bb5b7be2317692 to your computer and use it in GitHub Desktop.
Parallelize RDS compression/decompression to improve serialization performance in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Modified from: https://gist.github.com/retrography/359e0cc56d2cf1acd161b5645bc801a8 | |
# The functions below use parallelized version of gzip called pigz | |
# to improve compression/decompression performance of RDS serialization in R. | |
# Each function searches for the appropriate program (based on the require | |
# compression format) and if found, offloads the compression handling to the | |
# external program and therefore leaves R free to do the data import/export. | |
# The functions have been only tested on macOS, but probably also work on other *nix systems | |
# Requires the following packages: pigz. | |
# Run the following line at the command prompt before using these functions. | |
# | |
# brew install pigz | |
library(parallel) | |
writeRDS <- function(object, con) { | |
tryCatch({ | |
base::saveRDS(object,file = con) | |
}, warning = function(w) { | |
print(paste("WARNING: ", w)) | |
}, error = function(e) { | |
print(paste("ERROR: ", e)) | |
}, finally = { | |
close(con) | |
}) | |
} | |
loadRDS <- function(con) { | |
tryCatch({ | |
base::readRDS(file = con) | |
}, warning = function(w) { | |
print(paste("WARNING: ", w)) | |
}, error = function(e) { | |
print(paste("ERROR: ", e)) | |
}, finally = { | |
close(con) | |
}) | |
} | |
saveRDS.gz <- | |
function(object, file, threads = parallel::detectCores(), compression_level = 6) { | |
if (as.logical(nchar(Sys.which( "pigz" )))) { | |
writeRDS(object,pipe(paste0("pigz -ck -p", | |
threads, " -", | |
compression_level, " > ", | |
'\"',file,'\"'), | |
"wb") | |
) | |
} else { | |
base::saveRDS( | |
object, | |
file = file, | |
compress = "gzip" | |
) | |
} | |
} | |
readRDS.gz <- | |
function(file, threads = parallel::detectCores()) { | |
if (as.logical(nchar(Sys.which( "pigz" )))) { | |
object <- | |
loadRDS(pipe(paste0("pigz -dkc -p", | |
threads, " ", | |
'\"',file,'\"') | |
) | |
) | |
} else { | |
object <- | |
base::readRDS(file) | |
} | |
return(object) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment