Skip to content

Instantly share code, notes, and snippets.

@Tadge-Analytics
Created January 30, 2023 04:32
Show Gist options
  • Save Tadge-Analytics/bd02fa0178651a201fabefa4843b82d6 to your computer and use it in GitHub Desktop.
Save Tadge-Analytics/bd02fa0178651a201fabefa4843b82d6 to your computer and use it in GitHub Desktop.
###################################################################
folder_path <- "diamond csv storage"
csv_file_path <- paste0(folder_path, "/diamonds.csv")
xlsx_file_path <- paste0(folder_path, "/diamonds.xlsx")
###################################################################
# running for the first time, this section creates the csv + excel files
if (FALSE) {
library(tidyverse)
if (!dir.exists(folder_path)) {dir.create(folder_path)}
diamonds %>%
write_csv(csv_file_path)
diamonds %>%
openxlsx::write.xlsx(xlsx_file_path, asTable = T)
}
###################################################################
# from:
# https://www.jottr.org/2023/01/10/progressr-0.13.0/
progressr::handlers(global = TRUE)
options(cli.progress_handlers = "progressr")
###################################################################
# for 300 files 13.64 sec elapsed
if (FALSE) {
library(tidyverse)
how_many_files <- 300
tictoc::tic()
import_of_files <-
tibble(row_id = 1:how_many_files, file_path = csv_file_path) %>%
mutate(imported_data = map(file_path, ~.x %>% read_csv(show_col_types = F, progress = FALSE), .progress = TRUE))
tictoc::toc()
}
###################################################################
# for 300 files 4.96 sec elapsed
if (FALSE) {
library(tidyverse)
how_many_files <- 300
tictoc::tic()
import_of_files <-
tibble(row_id = 1:how_many_files, file_path = csv_file_path) %>%
mutate(imported_data = map(file_path, ~.x %>% vroom::vroom(show_col_types = F, progress = FALSE), .progress = TRUE))
tictoc::toc()
}
###################################################################
# for 100 files 61.35 sec elapsed
if (FALSE) {
library(tidyverse)
how_many_files <- 100
tictoc::tic()
import_of_files <-
tibble(row_id = 1:how_many_files, file_path = xlsx_file_path) %>%
mutate(imported_data = map(file_path, ~.x %>% readxl::read_excel(), .progress = TRUE))
tictoc::toc()
}
###################################################################
# with future map
# for 100 files 19.87 sec elapsed
if (FALSE) {
library(tidyverse)
library(furrr)
# this also loads the future package
all_cores <- parallelly::availableCores() # what's the difference to? all_cores <- parallelly::availableCores(logical = FALSE) -is this line even required?
future::plan(multisession, workers = all_cores)
how_many_files <- 100
tictoc::tic()
import_of_files <-
tibble(row_id = 1:how_many_files, file_path = xlsx_file_path) %>%
mutate(imported_data = future_map(file_path, ~.x %>% readxl::read_excel(progress = FALSE), .progress = TRUE))
tictoc::toc()
}
###################################################################
# for 100 files 20.33 sec elapsed
if (FALSE) {
library(tidyverse)
library(doSNOW)
library(doParallel)
# all_cores <- parallel::detectCores(logical = FALSE)
all_cores <- parallel::detectCores()
cl <- parallel::makePSOCKcluster(all_cores)
doSNOW::registerDoSNOW(cl)
how_many_files <- 100
pb <- txtProgressBar(max = how_many_files, style = 3)
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress = progress)
tictoc::tic()
result_of_loop <-
foreach(file_id=1:how_many_files, .options.snow = opts, .packages = c("tidyverse")) %dopar% {
readxl::read_excel(xlsx_file_path, progress = FALSE)
}
import_of_files <- result_of_loop %>% list_rbind()
tictoc::toc()
stopCluster(cl)
close(pb)
}
###################################################################
# for 100 files 18.78 sec elapsed
if (FALSE) {
library(tidyverse)
library(doFuture)
doFuture::registerDoFuture()
plan(multisession)
how_many_files <- 100
tictoc::tic()
result_of_loop <-
foreach(file_id=1:how_many_files) %dopar% {
readxl::read_excel(xlsx_file_path, progress = FALSE)
}
import_of_files <- result_of_loop %>% list_rbind()
tictoc::toc()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment