Skip to content

Instantly share code, notes, and snippets.

@srvanderplas
Created January 15, 2019 03:29
Show Gist options
  • Select an option

  • Save srvanderplas/9f4a471e0fb19495935fb2104f3ba9f8 to your computer and use it in GitHub Desktop.

Select an option

Save srvanderplas/9f4a471e0fb19495935fb2104f3ba9f8 to your computer and use it in GitHub Desktop.
Weather from TimeAndDate.com
library(RSelenium)
library(rvest)
library(tidyverse)
# Newer versions of Selenium don't work that well...
rs_driver <- rsDriver(port = 4567L, version = "3.12.0")
rd <- rs_driver$client
rd$setImplicitWaitTimeout(milliseconds = 5000)
url <- function(m, y) sprintf("https://www.timeanddate.com/weather/@4846834/historic?month=%d&year=%d", m, y)
# Get icon -> text mapping
tmp <- data_frame()
y <- 2017
for (m in 1:12) {
rd$navigate(url(m, y))
rd$findElement(using = "css selector", "#wt-his-select")
rd$click()
opts <- rd$getPageSource() %>% magrittr::extract2(1) %>% read_html() %>%
html_nodes(css = ".weatherLinks a") %>%
html_attr("onclick")
opts <- opts[!is.na(opts)] %>% str_remove("return ")
tmp <- map_df(opts, function(x) {
rd$executeScript(x)
rd$getPageSource() %>% unlist() %>% read_html() %>%
html_nodes(".mtt") %>%
map_df(~data_frame(weather = html_attr(., "title") %>% str_trim(),
icon = html_attr(., "src") %>% str_extract("wt-\\d{1,}") %>% str_remove("wt-")))
}) %>%
bind_rows(tmp, .)
}
icon_meaning <- tmp %>%
mutate(icon = as.numeric(icon)) %>%
group_by(icon, weather) %>%
summarize(n = n()) %>%
arrange(icon, desc(n)) %>%
filter(row_number() == 1) %>%
ungroup() %>%
select(-n)
weather_data <- data_frame(date = rep(seq.Date(as.Date("2017-01-01"), as.Date("2019-01-14"), by = "day"), each = 4), time = NA, icon = NA, wind = NA, temp_low = NA, temp_high = NA)
for(m in 1:12) {
for (y in 2017:2018) {
rd$navigate(url(m, y))
days <- rd$getPageSource() %>%
magrittr::extract2(1) %>%
xml2::read_html() %>%
html_nodes(css = "#weatherContainer") %>%
html_nodes(css = ".section")
date <- days %>% html_nodes(".date") %>%
html_text() %>%
sprintf("%s %d", ., y) %>%
lubridate::mdy("%a, %B %d %y", tz = "America/Chicago") %>%
as.Date()
date <- date[!is.na(date)]
time <- days %>% html_nodes(".time") %>%
html_text()
icon <- days %>% html_nodes(".wicon") %>% html_attr("data-icon")
wind <- days %>% html_nodes(".wstext") %>% html_text()
temp_low <- days %>% html_nodes(".tempLow") %>% html_text() %>% str_remove("Lo:") %>% parse_number()
temp_hi <- days %>% html_nodes(".temp") %>% html_text() %>% str_remove("Hi:") %>% parse_number()
weather_data[weather_data$date %in% date, 2:6] <- data_frame(time = time, icon = icon, wind = wind, temp_low = temp_low, temp_high = temp_hi)
}
}
weather_data <- weather_data %>%
mutate(icon = as.numeric(icon)) %>%
left_join(icon_meaning)
write_csv(weather_data, "data/Ames_weather_2017-2018.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment