Created
July 18, 2015 02:47
-
-
Save jalapic/02521d38c55d57540221 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Getting golf major winners | |
library(dplyr) | |
library("rvest") | |
url <- "http://en.wikipedia.org/wiki/Masters_Tournament" | |
masters <- url %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[4]') %>% html_table() | |
url1 <- "http://en.wikipedia.org/wiki/U.S._Open_(golf)" | |
usopen <- url1 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[2]') %>% html_table() | |
url2 <- "http://en.wikipedia.org/wiki/The_Open_Championship" | |
theopen <- url2 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[5]') %>% html_table() | |
url3 <- "http://en.wikipedia.org/wiki/PGA_Championship" | |
pga1 <- url3 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[3]') %>% html_table() | |
pga2 <- url3 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[4]') %>% html_table(fill=T) | |
#tidy up | |
str(masters) | |
masters <- masters[[1]][,1:2] | |
usopen <- usopen[[1]][,1:2] | |
theopen <- theopen[[1]][c(1,3)] | |
pga1 <- pga1[[1]][,1:2] | |
pga2 <- pga2[[1]][,1:2] | |
#tidyup | |
masters<-masters[-71,] | |
usopen<-usopen[-95,] | |
usopen<-usopen[-71,] | |
theopen<-theopen[-136,] | |
theopen<-theopen[-92,] | |
theopen<-theopen[-71,] | |
theopen<-theopen[-1,] | |
pga1<-pga1[-1,] | |
pga2<-pga2[-41,] | |
pga2<-pga2[-40,] | |
pga2<-pga2[-15,] | |
#add major | |
masters$major<-"masters" | |
usopen$major<-"usopen" | |
theopen$major<-"theopen" | |
pga1$major<-"pga" | |
pga2$major<-"pga" | |
golf<-list(masters, usopen, theopen, pga1, pga2) | |
library(dplyr) | |
golf <- lapply(golf, function(x) x %>% mutate(player = gsub( " *\\(.*?\\) *", "", x[,2]))) | |
golf <- do.call("rbind", golf) | |
golf[,1] <- as.numeric(as.character(golf[,1])) | |
head(golf) | |
golf <- golf %>% group_by(player) %>% arrange(Year) %>% mutate(value=1, total = cumsum(value)) | |
topgolfers <- golf %>% filter(max(total)>7) %>% .$player %>% unique() | |
golf$grp <- ifelse(golf$player %in% topgolfers, 1, 0) | |
golf1 <- golf %>% filter(grp==1) %>% ungroup() | |
#not graphing everyone from 0 to 1 wins..... would need to add in zero wins into df. | |
library(ggplot2) | |
ggplot(golf, (aes(Year, total))) + | |
geom_path(aes(group=player), color="gray55", lwd=1) + | |
geom_path(aes(Year, total, group=player), color="dodgerblue", lwd=2, data=golf1) + | |
scale_x_continuous(breaks=seq(1860, 2020, by=20)) + | |
ylab("Total Majors")+ | |
ggtitle("Cumulative Golf Majors by Player")+ | |
theme( | |
plot.title = element_text(hjust=0,vjust=1, size=rel(3.3)), | |
panel.background = element_blank(), | |
panel.grid.major.y = element_line(color="gray65"), | |
panel.grid.major.x = element_line(color="gray65"), | |
panel.grid.minor = element_blank(), | |
plot.background = element_blank(), | |
text = element_text(color="gray20", size=10), | |
axis.text = element_text(size=rel(1.0)), | |
axis.text.x = element_text(color="gray20",size=rel(2.5), angle=90, vjust=1), | |
axis.text.y = element_text(color="gray20", size=rel(2.5)), | |
axis.title.x = element_text(size=rel(2.5), vjust=0), | |
axis.title.y = element_text(size=rel(2.5), vjust=1), | |
axis.ticks.y = element_blank(), | |
axis.ticks.x = element_blank(), | |
legend.position = "none" | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment