Created
May 10, 2017 09:48
-
-
Save fauxneticien/08bd16238d9e2ba4104d0f3f7f6a8178 to your computer and use it in GitHub Desktop.
Download files with names matching pattern from an Alveo list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################## 1. Set up ###################################### | |
# Make sure you have the necessary packages installed (see section 2). | |
# The last number from the list URL for example 1045 for: | |
# https://app.alveo.edu.au/item_lists/1045 | |
alveo_list_id <- 904 | |
# Regex pattern to search for 'speaker16.wav$' means 'ending with speaker16.wav' | |
alveo_search_pattern <- "speaker16.wav$" | |
# Directory to download the wav files into | |
output_path <- "~/Desktop/" | |
########################### 2. Packages & config ############################## | |
# Install packages if you don't have them, e.g.: | |
# install.packages("pbapply") | |
library(purrr) | |
library(pbapply) | |
library(stringr) | |
# For alveo grab the latest copy from me (for now) | |
# devtools::install_github("fauxneticien/alveo-r") | |
library(alveo) | |
if(!file.exists("~/alveo.config")) { | |
stop("Do you have alveo.config file in your home folder?") | |
} | |
client <- RestClient(server_uri="app.alveo.edu.au") | |
item_list <- client$get_item_list_by_id(alveo_list_id) | |
################################ 3. Pipeline ################################## | |
# Note I've split the pipeline up to give some verbose output message() calls | |
message("Getting items from list at https://app.alveo.edu.au/item_lists/", alveo_list_id) | |
pblapply(X = 1:length(item_list$items), | |
FUN = function(an_item) { item_list$get_item(an_item) }) -> result | |
message("Getting metadata of each item in list...") | |
pblapply(X = result, | |
FUN = function(alveo_doc) { | |
metadata <- alveo_doc$get_metadata() | |
return(metadata$`ausnc:document`) | |
}) -> result | |
message("Downloading files matching search string '", alveo_search_pattern,"' to ", output_path) | |
map(result, function(doc_string) { | |
str_split(string = doc_string, pattern = ", ")[[1]] %>% | |
keep(~ grepl(pattern = alveo_search_pattern, x = .)) | |
}) %>% | |
map(~ Document(uri = ., type = "audio/wav")) %>% | |
pblapply(FUN = function(alveo_obj) { alveo_obj$download(output_path) }) -> result | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment