Created
December 23, 2015 10:35
-
-
Save ccagrawal/f1a91bd3a94aafa94f93 to your computer and use it in GitHub Desktop.
Calculate homecourt advantage for each NBA team
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sportsTools) | |
library(dplyr) | |
library(ggplot2) | |
start.year <- 2001 | |
end.year <- 2015 | |
all.games <- data.frame() | |
for (year in start.year:end.year) { | |
schedule <- GetSchedule(year, 'regular') | |
# Create model matrix where home teams have a 1, away teams have a -1 | |
home.model <- model.matrix(~ home.name + 0, data = schedule) | |
away.model <- model.matrix(~ away.name + 0, data = schedule) | |
model <- data.frame(home.model - away.model) | |
colnames(model) <- gsub('home.name(.*)', paste0('\\1.', year), colnames(model)) | |
# Add more columns to represent home court location with a 1 | |
model <- cbind(model, data.frame(home.model)) | |
colnames(model) <- gsub('home.name(.*)', paste0('\\1.Home'), colnames(model)) | |
# Add outcome column | |
model$Outcome <- schedule$home.margin | |
all.games <- bind_rows(all.games, model) | |
} | |
# Replace all NA with 0 | |
all.games[is.na(all.games)] <- 0 | |
# Remove NOP / OKC home column | |
bad.col <- which(colnames(all.games) == 'New.Orleans.Oklahoma.City.Hornets.Home') | |
all.games <- all.games[, -bad.col] | |
# Combine Charlotte Bobcats and Charlotte Hornets | |
old.col <- which(colnames(all.games) == 'Charlotte.Bobcats.Home') | |
new.col <- which(colnames(all.games) == 'Charlotte.Hornets.Home') | |
all.games[, new.col] <- all.games[, old.col] + all.games[, new.col] | |
all.games <- all.games[, -old.col] | |
# Combine New Orleans Hornets and New Orleans Pelicans | |
old.col <- which(colnames(all.games) == 'New.Orleans.Hornets.Home') | |
new.col <- which(colnames(all.games) == 'New.Orleans.Pelicans.Home') | |
all.games[, new.col] <- all.games[, old.col] + all.games[, new.col] | |
all.games <- all.games[, -old.col] | |
# Compute regression and save coefficients | |
fit <- lm(Outcome ~ ., data = all.games) | |
results <- as.data.frame(summary(fit)$coefficients) | |
intercept <- results[1, ] | |
results <- results[grep('Home', row.names(results)), ] | |
# Incorporate intercept | |
results$Estimate <- results$Estimate + intercept[1, 1] | |
#results$`Std. Error` <- sqrt(results$`Std. Error`^2 + intercept[1, 2]^2) | |
# Calculate quantity for each home | |
results$Sample <- sapply(row.names(results), function(x) sum(all.games[, x])) | |
# Calculate 90% confidence interval for each home | |
results$crit.T <- qt(.95, results$Sample - 1) | |
results <- results[, c(1, 2, 6)] | |
results$Upper <- results[, 1] + results[, 2] * results[, 3] | |
results$Lower <- results[, 1] - results[, 2] * results[, 3] | |
# Clean Up Results | |
results$Team <- row.names(results) | |
row.names(results) <- NULL | |
results <- results[, c('Team', 'Estimate')] | |
results$Team <- gsub('\\.', ' ', results$Team) | |
results$Team <- gsub(' Home', '', results$Team) | |
results <- results[order(results$Estimate, decreasing = TRUE), ] | |
write.csv(results, 'hca_results.csv', row.names = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment