Created
July 1, 2016 07:14
-
-
Save HughParsonage/ec598ebe30b73cd56c651219d5bec834 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(dplyr) | |
library(tidyr) | |
library(xml2) | |
library(data.table) | |
url_act_electorates_outrights <- | |
"http://www.sportsbet.com.au/betting/politics/australian-federal-politics/outrights?ev_oc_grp_id=1971028" | |
url_nt <- | |
"http://www.sportsbet.com.au/betting/politics/australian-federal-politics/outrights?ev_oc_grp_id=1971052" | |
xpaths <- | |
list( | |
ACT = 1971028, | |
NT = 1971052, | |
NSW = 1956097, | |
QLD = 1971071, | |
SA = 1971079, | |
TAS = 1971095, | |
VIC = 1971122, | |
WA = 1971123 | |
) | |
state_odds_table <- function(i){ | |
nsw_url <- | |
paste0("http://www.sportsbet.com.au/betting/politics/australian-federal-politics/outrights?ev_oc_grp_id=", xpaths[[i]]) | |
odds <- | |
nsw_url %>% | |
read_html() %>% | |
html_nodes(xpath=paste0('//*[@id="accordion-body-', xpaths[[i]], '"]')) %>% | |
xml_text() | |
odds_nodes <- | |
nsw_url %>% | |
read_html() %>% | |
xml_nodes(xpath='//*[@id="accordion"]/ul') | |
tbl_odds_char <- | |
odds_nodes %>% | |
xml_text() %>% | |
strsplit(split = "[0-9][0-9]:[0-9][0-9][^0-9]+Markets\\s+\\([0-9]\\)\n+") | |
# first entry not a division | |
tbl_odds_char <- | |
tbl_odds_char[[1]][-1] | |
divisions <- | |
odds_nodes %>% | |
xml_text() %>% | |
stringr::str_extract_all(pattern = "[0-9][0-9]:[0-9][0-9][^0-9]+Markets\\s+\\([0-9]\\)\n+") %>% | |
unlist() %>% | |
gsub("\n+", "", .) %>% | |
gsub("[^A-Za-z]", "", .) %>% | |
# La Trobe is 'Vic'. | |
gsub(paste0("^([A-Za-z]+)((Vic)|(", names(xpaths)[i], "))Markets$"), "\\1", .) | |
divisions <- ifelse(grepl("GEL+IBRAND", divisions, ignore.case = TRUE), | |
"GELLIBRAND", | |
divisions) | |
names(tbl_odds_char) <- divisions | |
odds_nsw_list <- | |
tbl_odds_char %>% | |
unlist %>% | |
stringr::str_split(., pattern = "\n{3,}") | |
names(odds_nsw_list) <- divisions | |
include_only_party_indices <- function(x){ | |
x[grepl("\\.[0-9][0-9][0-9]*$", x)] # need to include 1.001 | |
} | |
odds_nsw_list <- lapply(odds_nsw_list, include_only_party_indices) | |
maxRow <- max(sapply(odds_nsw_list, length)) | |
cbind_asis <- function(...){ | |
cbind.data.frame(..., stringsAsFactors = FALSE) | |
} | |
odds_nsw_df <- | |
do.call(cbind_asis, lapply(odds_nsw_list, function(x){ | |
length(x) <- maxRow | |
x | |
})) %>% | |
as.data.frame(., stringsAsFactors = FALSE) %>% | |
mutate(rowNumber = as.character(rownames(.))) %>% | |
gather(Division, char, -rowNumber, na.rm = TRUE) %>% | |
filter(complete.cases(.)) %>% | |
mutate(Party = stringr::str_extract(char, pattern = "((Labor)|(Xenophon)|(Liberal)|(Coalition)|(National)|(Green)|(Other)|(Independent))"), | |
Odds = as.numeric(gsub("^.*\n+(.*)$", "\\1", char))) %>% | |
mutate(Party = ifelse(Party %in% c("National", "Liberal"), "Coalition", Party)) | |
as.data.table(odds_nsw_df) | |
} | |
national_odds <- | |
lapply(seq_along(xpaths), state_odds_table) %>% | |
rbindlist(use.names = TRUE, fill = TRUE) | |
national_odds %<>% | |
group_by(Division) %>% | |
arrange(Odds) %>% | |
mutate(Probability = (1/(Odds)) / sum(1 / Odds), | |
Favourite = first(Party)) %>% | |
mutate(Time = Sys.time()) %>% | |
group_by(Division, Party) %>% | |
mutate( | |
Probability2 = sum(Probability, na.rm = TRUE) | |
) %>% | |
# this will exclude some where the odds are even | |
ungroup | |
national_favourites <- | |
national_odds %>% | |
select(-char) %>% | |
group_by(Division) %>% | |
filter(rowNumber == 1) %>% | |
ungroup %>% | |
mutate(ELECT_DIV2 = toupper(Division)) | |
# fwrite(national_odds, file.path = paste0("data/", gsub(":", "", Sys.time(), fixed = TRUE), "national_favourites.csv")) | |
readr::write_csv(national_odds, | |
path = file.path("data", paste0(gsub(":", "", Sys.time(), fixed = TRUE), "national_favourites.csv"))) | |
readr::write_csv(national_favourites, | |
path = file.path("data", paste0(gsub(":", "", Sys.time(), fixed = TRUE), "the_national_favourites.csv"))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment