Skip to content

Instantly share code, notes, and snippets.

@Btibert3
Created April 1, 2011 23:06
Show Gist options
  • Save Btibert3/899024 to your computer and use it in GitHub Desktop.
Save Btibert3/899024 to your computer and use it in GitHub Desktop.
Player Streakiness in the NHL
## basics
# R 2.12.2
# windows xp; Yes, I know
## libraries
library(XML)
library(plyr)
library(lubridate)
library(ggplot2)
# Set the working directory
setwd("~/My Dropbox/Eclipse/Projects/R/NHL/Blog Posts/Player Streakiness")
# Set the constants
BASE <- "http://www.hockey-reference.com/players/h/hortona01/gamelog/"
SEASON <- c(2004, 2006:2011)
# Loop and grab the data
ds <- data.frame()
for (S in SEASON) {
URL <- paste(BASE, S, "/", sep="")
tables <- readHTMLTable(URL)$stats
head(tables, n=30)
# fix factors and names
for(i in 1:ncol(tables)) {
tables[,i] <- as.character(tables[,i])
names(tables) <- tolower(colnames(tables))
}
tables
str(tables)
names(tables)[6] <- "AwayHome"
names(tables)[8] <- "WinLoss"
names(tables)[9] <- "goals"
names(tables)
# fix the columns - NAs forced by coercion warnings
str(tables)
for(i in c(1:2, 9:19)) {
tables[,i] <- as.numeric(tables[, i])
}
str(tables)
tables$year <- S
ds <- rbind.fill(ds, tables)
# BE KIND when scraping
Sys.sleep(10)
}
with(ds, table(year))
head(ds, n=30)
dim(ds)
ds<- ds[!is.na(ds$rk), ]
dim(ds)
head(ds, n=30)
save(ds, file="Horton.Rdata")
# Need to change the date to an actual date in R
str(ds)
ds$date <- parse_date(ds$date, c("%Y", "%m", "%d"), seps="-")
str(ds)
# Format to the month year = do so by setting all with the same arbitrary year
# Set the last months of the season to the year plus 1 so the dates are in "order" when plotted
ds$date <- update(ds$date, year=2010)
ds$date[month(ds$date) < 10] <- update(ds$date[month(ds$date) < 10], year=2011)
head(ds, n=40)
# Help recieved from
# http://stackoverflow.com/questions/5494216/extract-date-in-r
# add cumulative goals by season and make a new dataframe
gamelog <- ddply(ds, .(year), transform, cumegoals = cumsum(goals))
# plot the data
ggplot(aes(y=cumegoals, x=date), data=gamelog) + geom_point() + geom_line() +
facet_wrap(~year, ncol=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment