Last active
February 13, 2022 22:55
-
-
Save jimjam-slam/b25ab6215c63a0e3f7f70087edf95a9b to your computer and use it in GitHub Desktop.
Download grouped remote files using friendlier group names #rstatstips
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# based on {searchable} and https://stackoverflow.com/a/64931927/3246758 | |
# stats::setNames is fine too! | |
invert <- function(x) { | |
set_names(names(x), x) | |
} | |
# let's map the coded names to our friendly ones! | |
fruit <- c( | |
"apple" = "ABCJ001", | |
"banana" = "ABCJ002", | |
"orange" = "AKFJ004") | |
season <- c( | |
"summer" = "DJF", | |
"autumn" = "MAM", | |
"winter" = "JJA", | |
"spring" = "SON") | |
# now let's work out all the possible combos of fruit and season codes | |
# stats::expand.grid is fine too! | |
combos <- | |
expand_grid(fruit, season) %>% | |
# (we're going to rename these coded columns to avoid getting them mixed | |
# up with the unexpanded vectors above!) | |
rename(fruit_coded = fruit, season_coded = season) %>% | |
# now let's recode using the friendly names of the fruits and seasons... | |
mutate( | |
fruit_friendly = recode(fruit_coded, !!!invert(fruit)), | |
season_friendly = recode(season_coded, !!!invert(season))) %>% | |
# and now we can build file paths | |
mutate( | |
remote_path = paste0("/data/2019/sales/", fruit_coded, "-", season_coded, ".csv"), | |
local_path = | |
file.path("data", paste0(fruit_friendly, "-", season_friendly, ".csv"))) | |
combos | |
# # A tibble: 12 x 6 | |
# fruit season fruit_friendly season_friendly remote_path local_path | |
# <chr> <chr> <chr> <chr> <chr> <chr> | |
# 1 ABCJ0… DJF apple summer /data/2019/sales/… data/apple-s… | |
# 2 ABCJ0… MAM apple autumn /data/2019/sales/… data/apple-a… | |
# 3 ABCJ0… JJA apple winter /data/2019/sales/… data/apple-w… | |
# 4 ABCJ0… SON apple spring /data/2019/sales/… data/apple-s… | |
# 5 ABCJ0… DJF banana summer /data/2019/sales/… data/banana-… | |
# 6 ABCJ0… MAM banana autumn /data/2019/sales/… data/banana-… | |
# 7 ABCJ0… JJA banana winter /data/2019/sales/… data/banana-… | |
# 8 ABCJ0… SON banana spring /data/2019/sales/… data/banana-… | |
# 9 AKFJ0… DJF orange summer /data/2019/sales/… data/orange-… | |
# 10 AKFJ0… MAM orange autumn /data/2019/sales/… data/orange-… | |
# 11 AKFJ0… JJA orange winter /data/2019/sales/… data/orange-… | |
# 12 AKFJ0… SON orange spring /data/2019/sales/… data/orange-… | |
# and let's see what the paths look like: | |
combos %>% select(remote_path, local_path) | |
# # A tibble: 12 x 2 | |
# remote_path local_path | |
# <chr> <chr> | |
# 1 /data/2019/sales/ABCJ001-DJF.csv data/apple-summer.csv | |
# 2 /data/2019/sales/ABCJ001-MAM.csv data/apple-autumn.csv | |
# 3 /data/2019/sales/ABCJ001-JJA.csv data/apple-winter.csv | |
# 4 /data/2019/sales/ABCJ001-SON.csv data/apple-spring.csv | |
# 5 /data/2019/sales/ABCJ002-DJF.csv data/banana-summer.csv | |
# 6 /data/2019/sales/ABCJ002-MAM.csv data/banana-autumn.csv | |
# 7 /data/2019/sales/ABCJ002-JJA.csv data/banana-winter.csv | |
# 8 /data/2019/sales/ABCJ002-SON.csv data/banana-spring.csv | |
# 9 /data/2019/sales/AKFJ004-DJF.csv data/orange-summer.csv | |
# 10 /data/2019/sales/AKFJ004-MAM.csv data/orange-autumn.csv | |
# 11 /data/2019/sales/AKFJ004-JJA.csv data/orange-winter.csv | |
# 12 /data/2019/sales/AKFJ004-SON.csv data/orange-spring.csv | |
# let's make sure our target folders exist before we start trying to download files! | |
combos %>% | |
pull(local_path) %>% | |
dirname() %>% | |
unique() %>% | |
walk(dir.create, recursive = TRUE) | |
# now, we're ready to download a bunch of files! | |
# (cheap plug: you could do this safely with {purrr} or {collateral}) | |
combos %>% | |
{ walk2(.$remote_path, .$local_path, download.file) } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment