Skip to content

Instantly share code, notes, and snippets.

@conjugateprior
Last active August 29, 2015 14:19
Show Gist options
  • Save conjugateprior/8909cb8093e07ed14712 to your computer and use it in GitHub Desktop.
Save conjugateprior/8909cb8093e07ed14712 to your computer and use it in GitHub Desktop.
Scraping the barrel
library(magrittr)
library(stringr)
library(ggplot2)
library(rvest)
make_data <- function(sex=c("M", "F")){
sex <- match.arg(sex)
addr <- paste0('http://www.sexymp.co.uk/index.php?gender=', sex, '#vote')
gg <- read_html(addr, encoding="ISO-8859-1")
tds <- gg %>% html_nodes('table[width="700px"] tr td[valign="top"]')
howmany <- tds %>% length
mps <- tds %>% extract(seq(2, howmany, by=2))
parties <- mps %>%
html_nodes("b") %>%
extract(seq(2, howmany, by=2)) %>%
html_text %>%
str_trim
names <- mps %>%
html_nodes("a") %>%
html_text %>%
str_trim
sco <- mps %>%
html_text %>%
str_match(pattern="Won: (\\d+) Lost: (\\d+)")
dd <- data.frame(party=parties, mp=names,
pro=as.numeric(sco[,2]),
con=as.numeric(sco[,3]))
## logit scale from Lowe et al. 2011 LSQ
dd$logit <- log(dd$pro) - log(dd$con)
dd$se <- sqrt(1/dd$pro + 1/dd$con)
dd$lower <- dd$logit - dd$se * 2
dd$upper <- dd$logit+ dd$se * 2
dd
}
# From en.wikipedia.org/wiki/Wikipedia:Index_of_United_Kingdom_political_parties_meta_attributesdup
pcols <- c("Bi-Curious"="#DDDDDD", # err wot?
"Conservative"="#0087DC",
"Democratic Unionist"="#D46A4C",
"Labour"="#DC241f",
"Labour Co-operative"="#CC0000",
"Liberal Democrat"="#FDBB30",
"Plaid Cymru"="#008142",
"Scottish National Party"="#FFFF00",
"Sinn Fein"="#008800",
"Social Democratic and Labour"="#99FF66",
"Alliance"="#F6CB2F",
"Green Party"="#6AB023",
"Independent"="#DDDDDD")
scale_em <- function(df, pcols){
sdf <- df[order(df$logit),] ## sorted
sdf$rank <- 1:nrow(sdf)
ggplot(sdf, aes(x=logit, y=rank, colour=party)) +
geom_point(alpha=0.5, size=6) +
scale_colour_manual(values=pcols) +
xlab("sex appeal (or something)") +
ylab("rank")
}
ff <- make_data("F")
mm <- make_data("M")
scale_em(ff, pcols)
scale_em(mm, pcols)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment