Created
January 30, 2023 04:32
-
-
Save Tadge-Analytics/bd02fa0178651a201fabefa4843b82d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################### | |
folder_path <- "diamond csv storage" | |
csv_file_path <- paste0(folder_path, "/diamonds.csv") | |
xlsx_file_path <- paste0(folder_path, "/diamonds.xlsx") | |
################################################################### | |
# running for the first time, this section creates the csv + excel files | |
if (FALSE) { | |
library(tidyverse) | |
if (!dir.exists(folder_path)) {dir.create(folder_path)} | |
diamonds %>% | |
write_csv(csv_file_path) | |
diamonds %>% | |
openxlsx::write.xlsx(xlsx_file_path, asTable = T) | |
} | |
################################################################### | |
# from: | |
# https://www.jottr.org/2023/01/10/progressr-0.13.0/ | |
progressr::handlers(global = TRUE) | |
options(cli.progress_handlers = "progressr") | |
################################################################### | |
# for 300 files 13.64 sec elapsed | |
if (FALSE) { | |
library(tidyverse) | |
how_many_files <- 300 | |
tictoc::tic() | |
import_of_files <- | |
tibble(row_id = 1:how_many_files, file_path = csv_file_path) %>% | |
mutate(imported_data = map(file_path, ~.x %>% read_csv(show_col_types = F, progress = FALSE), .progress = TRUE)) | |
tictoc::toc() | |
} | |
################################################################### | |
# for 300 files 4.96 sec elapsed | |
if (FALSE) { | |
library(tidyverse) | |
how_many_files <- 300 | |
tictoc::tic() | |
import_of_files <- | |
tibble(row_id = 1:how_many_files, file_path = csv_file_path) %>% | |
mutate(imported_data = map(file_path, ~.x %>% vroom::vroom(show_col_types = F, progress = FALSE), .progress = TRUE)) | |
tictoc::toc() | |
} | |
################################################################### | |
# for 100 files 61.35 sec elapsed | |
if (FALSE) { | |
library(tidyverse) | |
how_many_files <- 100 | |
tictoc::tic() | |
import_of_files <- | |
tibble(row_id = 1:how_many_files, file_path = xlsx_file_path) %>% | |
mutate(imported_data = map(file_path, ~.x %>% readxl::read_excel(), .progress = TRUE)) | |
tictoc::toc() | |
} | |
################################################################### | |
# with future map | |
# for 100 files 19.87 sec elapsed | |
if (FALSE) { | |
library(tidyverse) | |
library(furrr) | |
# this also loads the future package | |
all_cores <- parallelly::availableCores() # what's the difference to? all_cores <- parallelly::availableCores(logical = FALSE) -is this line even required? | |
future::plan(multisession, workers = all_cores) | |
how_many_files <- 100 | |
tictoc::tic() | |
import_of_files <- | |
tibble(row_id = 1:how_many_files, file_path = xlsx_file_path) %>% | |
mutate(imported_data = future_map(file_path, ~.x %>% readxl::read_excel(progress = FALSE), .progress = TRUE)) | |
tictoc::toc() | |
} | |
################################################################### | |
# for 100 files 20.33 sec elapsed | |
if (FALSE) { | |
library(tidyverse) | |
library(doSNOW) | |
library(doParallel) | |
# all_cores <- parallel::detectCores(logical = FALSE) | |
all_cores <- parallel::detectCores() | |
cl <- parallel::makePSOCKcluster(all_cores) | |
doSNOW::registerDoSNOW(cl) | |
how_many_files <- 100 | |
pb <- txtProgressBar(max = how_many_files, style = 3) | |
progress <- function(n) setTxtProgressBar(pb, n) | |
opts <- list(progress = progress) | |
tictoc::tic() | |
result_of_loop <- | |
foreach(file_id=1:how_many_files, .options.snow = opts, .packages = c("tidyverse")) %dopar% { | |
readxl::read_excel(xlsx_file_path, progress = FALSE) | |
} | |
import_of_files <- result_of_loop %>% list_rbind() | |
tictoc::toc() | |
stopCluster(cl) | |
close(pb) | |
} | |
################################################################### | |
# for 100 files 18.78 sec elapsed | |
if (FALSE) { | |
library(tidyverse) | |
library(doFuture) | |
doFuture::registerDoFuture() | |
plan(multisession) | |
how_many_files <- 100 | |
tictoc::tic() | |
result_of_loop <- | |
foreach(file_id=1:how_many_files) %dopar% { | |
readxl::read_excel(xlsx_file_path, progress = FALSE) | |
} | |
import_of_files <- result_of_loop %>% list_rbind() | |
tictoc::toc() | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment