Created
April 15, 2019 22:41
-
-
Save jalapic/67df6f97996a2bfa9e6997f2792c36c6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(magrittr) | |
library(rvest) | |
webpage <- read_html("https://en.wikipedia.org/wiki/List_of_tournament_performances_by_Tiger_Woods") | |
l <- webpage %>% html_nodes("table.wikitable") %>% html_table() | |
#keep ones where first column is 'Tournament' | |
l <- l[unlist(lapply(l, function(x) colnames(x)[1]=="Tournament"))] | |
#1992 is first one in list | |
#no tournaments in 2016 | |
names(l) <- c(1992:2015,2017:2019) | |
# add year | |
l <- Map(cbind, l, year = names(l)) | |
# bind together | |
df <- data.table::rbindlist(l) | |
#colnames | |
colnames(df) <- c("tournament","r1","r2","r3","r4","score","par","place","money","year") | |
#remove matchplays | |
df <- df[df$r1!="see below",] | |
#remove stableford event | |
df <- df[-60,] | |
df <- df[-78,] | |
#remove withdrawn event | |
df <- df[-258,] | |
#add event number - overall and within year. | |
df <- df %>% mutate(event = row_number()) | |
#LasVegas Invertational has five rounds. | |
df <- df %>% separate("r4", c("r4","r5"), "/") | |
#add grouping variable | |
df$place1<-NA | |
df$place1<-gsub("T", "",df$place) | |
df$place1 <- as.numeric(as.character(df$place1)) #ensure numeric | |
df$group <- ifelse(df$place1<=10, "top10", NA) | |
df$group <- ifelse(df$place1<=5, "top5", df$group) | |
df$group <- ifelse(df$place1==1, "first", df$group) | |
df$group <- ifelse(df$place=="CUT", "cut", df$group) | |
df$group <- ifelse(is.na(df$group), "made cut", df$group) | |
table(df$group) | |
#make sure columns are numeric | |
df$r1 <- as.numeric(df$r1) | |
df$r2 <- as.numeric(df$r2) | |
df$r3 <- as.numeric(df$r3) | |
df$r4 <- as.numeric(df$r4) | |
df$r5 <- as.numeric(df$r5) | |
#gather data | |
df <- df %>% gather(round,value,2:6) %>% arrange(event,round) %>% filter(!is.na(value)) %>% | |
mutate(round = row_number()) | |
#mark final round | |
df <- df %>% group_by(event) %>% mutate(finalrd = ifelse(row_number()==max(row_number()), "yes", "no")) | |
#just highlight last rounds by group - 5 groups | |
ggplot() + | |
geom_point(aes(x=round,y=value),alpha=.9,color="gray75",data = df %>% filter(finalrd=="no"),size=3) + | |
geom_point(aes(x=round,y=value,color=group),data = df %>% filter(finalrd=="yes"),size=3) + | |
scale_color_manual(values=c("blue", "red", "lightsalmon", "sienna1", "orangered")) + | |
theme_minimal() | |
df$group1 <- ifelse(df$group=="first", "first", ifelse(df$group=="cut", "cut", "made cut")) | |
#highlight all rounds but 3 groups | |
ggplot() + | |
geom_point(aes(x=round,y=value,color=group1),alpha=.9,data = df,size=3) + | |
scale_color_manual(values=c("blue", "red", "gray88")) + | |
theme_minimal() | |
#just highlight last rounds by group - 3 groups | |
ggplot() + | |
geom_point(aes(x=round,y=value),alpha=.9,color="gray75",data = df %>% filter(finalrd=="no"),size=3) + | |
geom_point(aes(x=round,y=value,color=group1),data = df %>% filter(finalrd=="yes"),size=3) + | |
scale_color_manual(values=c("blue", "red", "gray88")) + | |
theme_minimal() | |
#Identify Majors | |
table(df$tournament) | |
majors <- c("PGA Championship","U.S. Open","Open Championship","The Open Championship","Masters Tournament") | |
df$major <- ifelse(df$tournament %in% majors, "major", "pga") | |
df$group2 <- ifelse(df$major=="major" & df$group1=="first", "major-first", df$group1) | |
table(df$group2) | |
#just highlight last rounds by group - 4 groups | |
p <- ggplot() + | |
geom_point(aes(x=round,y=value),alpha=.9,color="gray80",data = df %>% filter(finalrd=="no"),size=3) + | |
geom_point(aes(x=round,y=value,color=group1),data = df %>% filter(finalrd=="yes", group2!="major-first"),size=3) + | |
scale_color_manual(values=c("blue", "red", "gray70")) + | |
theme_minimal() | |
p1 <- p + | |
geom_point(aes(x=round,y=value), | |
data = df %>% filter(finalrd=="yes", group2=="major-first"), | |
shape=21, | |
color="black", | |
fill="red", | |
stroke=2, | |
size=2) | |
p1 | |
# add rolling average score, last 20 rounds... | |
df$ravg <- zoo::rollmeanr(df$value,k=20,fill=NA) | |
p2 <- p1+geom_line(aes(x=round,y=ravg),data=df,lwd=1) | |
## add labels and fix grid lines | |
p3<-p2 + | |
# ylim(60,90)+ | |
scale_y_continuous(minor_breaks = NULL, breaks = seq(60, 90, 5),limits=c(60, 90)) + | |
xlab("Round Number") + | |
ylab("Score") + | |
ggtitle("Tiger Woods PGA Tour Scoring by Round") | |
p3 | |
# get years | |
yearsdf <- df %>% group_by(year) %>% filter(row_number()==min(row_number())) | |
p4<-p3+ | |
geom_vline(data = yearsdf, aes(xintercept = round), lty=2, color="gray67") + | |
geom_text(data = yearsdf %>% filter(year!=2017), mapping = aes(x = round, label = year, y = 87), angle = 90, hjust = 0,size=5) | |
p4 | |
p5<- p4 + | |
geom_vline(data = df[38,], aes(xintercept = round), lty=2, color="pink") + | |
geom_text(aes(x = 38, y = 87),color="pink", label = "Turned Pro",angle = 90, hjust = 0,size=5) | |
#axis text size | |
p6 <- p5 + theme( | |
axis.title = element_text(size=20), | |
plot.title = element_text(size=20), | |
axis.text = element_text(size=15) | |
) | |
# create an svg image | |
library(svglite) | |
svglite("plot.svg", width = 30, height = 10) | |
p6 | |
dev.off() | |
ggsave("tigerplot.png", plot=p6, width=25,height=8) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment