Last active
September 3, 2018 04:30
-
-
Save tmasjc/9c6b31495eb6c4a1c35693fd36e4c7ec to your computer and use it in GitHub Desktop.
OpenAq dump collector. #rstats #openaq
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(tidyverse) | |
| library(rvest) | |
| # basic parameter | |
| baseURL <- "https://openaq-data.s3.amazonaws.com/" | |
| startDate <- as.Date("2015-07-01") | |
| endDate <- as.Date("2018-04-01") | |
| # from url endpoints | |
| filenames <- paste0(seq.Date(from = startDate, to = endDate, by = "day"), ".csv") | |
| # download file with a minor pause | |
| download.file.new <- function(...){ | |
| # be verbose | |
| download.file(..., quiet = FALSE) | |
| Sys.sleep(5) | |
| } | |
| # for recursive downloading, do not stop if any error occurs | |
| safely_download <- safely(download.file.new) | |
| # make sure directory exists before proceeding | |
| if(!dir.exists("Data/")) dir.create("Data/") | |
| # begin download | |
| get_url <- function(url) { | |
| map2(.x = paste0(baseURL, url), | |
| .y = paste0("Data/", url), | |
| .f = safely_download) | |
| } | |
| # baby testing | |
| dl <- get_url(head(filenames)) | |
| #dl <- get_url(filenames) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment