Created
June 23, 2011 00:55
-
-
Save Btibert3/1041659 to your computer and use it in GitHub Desktop.
2011 NHL D Statistics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set working directory | |
setwd("/My Dropbox/Projects/NHL Defensemen 2011 performance") | |
# load the libraries I commonly use | |
library(XML) | |
library(plyr) | |
library(lubridate) | |
library(ggplot2) | |
# grab the data | |
URL <- "http://www.hockey-reference.com/leagues/NHL_2011_skaters.html" | |
tables <- readHTMLTable(URL)$stats | |
head(tables) | |
# filter on D | |
ds <- tables[tables$Pos == 'D', ] | |
nrow(ds) # number of records | |
# change data types -- probably an easier way, but this helped me learn R | |
str(ds) | |
for (i in c(1,3,6:19)) { | |
ds[,i] <- as.numeric(as.character(ds[,i])) # important! -- convert factor to string first | |
} | |
for (i in c(2, 4:5, 20)) { | |
ds[,i] <- as.character(ds[,i]) | |
} | |
# lets cut on games played to "core" set of players -- upper 50% | |
summary(ds$GP) | |
hist(ds$GP, xlab="Games played", main="Distribution of games played") | |
ds <- ds[ds$GP >= median(ds$GP),] | |
# lets look at the distribution of +/- | |
names(ds)[11] <- "plusmin" | |
summary(ds$plusmin) | |
hist(ds$plusmin) | |
# sort the dataframe | |
sorted.pm <- ds[order(ds$plusmin, decreasing=T), ] | |
# top 25 | |
head(sorted.pm, n=25) | |
# plot plusmin and time on ice | |
plot(ds$TOI, ds$plusmin, xlab="+/-", ylab="Points", pch=20, cex=.8) | |
# sort the dataframe on TOI | |
sorted.toi <- ds[order(ds$TOI, decreasing=T), ] | |
# top 25 | |
head(sorted.toi, n=25) | |
# Zee was top on +/- and top 3 in TOI..... +/- not the best stat, but coupled with TOI, its a start IMO |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment