Last active
December 17, 2015 16:00
-
-
Save abresler/60849bf96ec462dfeb29 to your computer and use it in GitHub Desktop.
NBA 2014-15 Heatmap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
getBREFTeamStatTable <- | |
function(season_end = 2015, table_name = 'team', date = T) { | |
packages <- | |
c('rvest','dplyr','pipeR','RCurl', 'XML','reshape2', 'tidyr', 'magrittr') | |
lapply(packages, library, character.only = T) | |
base <- | |
'http://www.basketball-reference.com/leagues/' | |
season <- | |
(season_end - 1) %>% | |
paste0("-",season_end) | |
league <- | |
'NBA' | |
table_name %<>% | |
tolower | |
css_page <- | |
'#' %>% paste0(table_name) | |
table <- | |
table_name %>% | |
paste('stats', sep = "_") | |
css_id <- | |
css_page %>% | |
paste0(" a") | |
url <- | |
base %>% | |
paste0(league,'_',season_end,".html") | |
if (table_name == 'standings') { | |
t <- | |
url %>% | |
html %>% | |
html_table(fill = T) | |
if (season_end >= 1971) { | |
df <- | |
t[2] %>% data.frame %>% | |
tbl_df | |
names(df) <- | |
c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs') | |
df$conference <- | |
'Eastern' | |
df2 <- | |
t[3] %>% | |
data.frame %>% | |
tbl_df | |
names(df2) <- | |
c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs') | |
df2$conference <- 'Western' | |
df <- | |
bind_rows(df,df2) | |
df$playoff_team <- | |
df$team %>% | |
grepl('\\*',.) | |
df$team %<>% | |
gsub('\\*','',.) | |
conference_rank <- | |
df$team %>% | |
colsplit('\\(',c('team','conference_rank')) %>% | |
.$conference_rank | |
df$conference_rank <- | |
conference_rank %>% | |
gsub('\\)','',.) %>% | |
as.numeric | |
df$team <- | |
df$team %>% | |
colsplit('\\(',c('team','conference_rank')) %>% | |
.$team %>% | |
Trim | |
} else{ | |
df <- | |
t[2] %>% | |
data.frame %>% | |
tbl_df | |
names(df) <- | |
c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs') | |
div <- | |
df$team %>% grep('Western Division',.) %>% | |
as.numeric | |
df$conference <- | |
'' | |
df[1:div,'conference'] <- | |
'Eastern' | |
df[div:nrow(df),'conference'] <- | |
'Western' | |
if (grep('\\(',df$team) %>% length > 0) { | |
conference_rank <- | |
df$team %>% | |
colsplit('\\(',c('team','conference_rank')) %>% | |
.$conference_rank | |
df$conference_rank <- | |
conference_rank %>% gsub('\\)','',.) %>% | |
as.numeric | |
df$team <- | |
df$team %>% | |
colsplit('\\(',c('team','conference_rank')) %>% | |
.$team %>% | |
Trim | |
} | |
df$playoff_team <- | |
df$team %>% | |
grepl('\\*',.) | |
df$team %<>% | |
gsub('\\*','',.) | |
} | |
df[df$games_back == '—','games_back'] <- | |
0 | |
df$games_back %<>% | |
as.numeric | |
df %<>% | |
dplyr::filter(team == 'Baltimore Bullets' | !is.na(wins)) | |
df$point_differential <- | |
df$pts.g - df$opp_pts.g | |
team_url_stems <- | |
url %>% | |
html %>% | |
xpathSApply(path = '//*[(@id = "all_standings")]//a',xmlAttrs) %>% | |
unlist | |
names(team_url_stems) <- | |
NULL | |
bref_team_id <- | |
team_url_stems %>% | |
gsub('.html|\\/teams|\\/','',.) %>% | |
gsub(pattern = "[^[:alpha:]]", replacement = "", .) | |
df <- | |
data.frame(season,table_name = 'standings', bref_team_id, df) %>% | |
tbl_df | |
df$bref_team_season_url <- | |
'http://www.basketball-reference.com' %>% | |
paste0(team_url_stems) | |
} | |
else{ | |
df <- | |
url %>% ## get table | |
html %>% | |
html_nodes(css_page) %>% | |
html_table(header = F) %>% | |
data.frame %>% | |
tbl_df | |
if(df$X1[1] == 'Rk') { | |
names <- | |
df %>% | |
dplyr::filter(X1 == "Rk") %>% | |
as.character | |
row_of_header <- | |
'Rk' %>% | |
grep(x = df$X1) #find where rank is | |
start <- | |
row_of_header + 1 | |
df <- | |
df[start:nrow(df),] | |
names %<>% | |
tolower | |
names(df) <- | |
names | |
} | |
else{ | |
names <- | |
df %>% | |
dplyr::filter(X1 == "Rk") %>% | |
as.character | |
row_of_header <- | |
'Rk' %>% | |
grep(x = df$X1) | |
start <- | |
row_of_header + 1 | |
df <- | |
df[start:nrow(df),] | |
names %<>% | |
tolower | |
names(df) <- | |
names | |
} | |
names(df) %<>% | |
gsub('\\%|/','\\.',.) | |
df$rk <- | |
NULL | |
table_name_character <- | |
c('team','arena') | |
df[,!(df %>>% names) %in% table_name_character] %>>% apply(2, function(x) | |
gsub('\\,','',x) %>>% | |
as.numeric(x)) -> df[,!(df %>>% names) %in% table_name_character] #get rid of commas and make numeric | |
df$playoff_team <- | |
df$team %>% grepl(pattern = '\\*') | |
df$team %<>% | |
gsub('\\*','',.) | |
rows <- | |
df %>% | |
nrow - 1 | |
df <- | |
df[1:rows,] | |
xpath <- | |
paste0("//*[(@id = '",table_name,"')]//a") | |
##Grab Team Ids | |
stems <- | |
url %>% ## get table | |
html %>% | |
xpathSApply(xpath,xmlAttrs) %>% | |
unlist %>% | |
as.character | |
bref_team_id <- | |
stems %>% | |
gsub('\\/|.html|teams','',.) %>% | |
gsub(season_end,'',.) | |
df <- | |
data.frame(season,table_name = table, bref_team_id, df) %>% | |
tbl_df | |
df$bref_team_season_url <- | |
'http://www.basketball-reference.com' %>% | |
paste0(stems) | |
} | |
if (date == T) { | |
df$scrape_time <- | |
Sys.time() | |
} | |
return(df) | |
} | |
library(d3heatmap) | |
team2015 <- | |
getBREFTeamStatTable(season_end = 2015, table_name = 'team') | |
team2015 %<>% | |
arrange(team) | |
data <- | |
team2015 %>% | |
select(fg:pts.g) %>% | |
select(-fg.,-X3p., -X2p., -pts.g, -ft.) | |
rownames(data) <- | |
team2015$team | |
mp <- | |
team2015$mp | |
data.per240 <- | |
data %>% | |
apply(2, function(x) x / mp * 48 * 5) | |
row.names(data.per240) <- | |
team2015$team | |
unclustered <- | |
data.per240 %>% | |
d3heatmap(colors = "RdYlBu", theme = "dark",cluster = F) | |
clustered <- | |
data.per240 %>% | |
d3heatmap(colors = "RdYlBu", theme = "dark") | |
clustered | |
unclustered | |
library(DT) | |
data.per240 <- | |
data.per240[,colnames(data.per240)[c(17,1:6,12,11:9, 13:14, 7:8,16)]] | |
table <- | |
data.per240 %>% | |
datatable(colnames = | |
colnames(data.per240), rownames = TRUE, filter = 'top', | |
options = list(pageLength = 30, lengthMenu = c(5, 10, 15, 20, 25, 30), dom = 'Rlfrtip',colReorder = list(realtime = TRUE), initComplete = JS( | |
"function(settings, json) {", | |
"$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});", | |
"}")), | |
caption = htmltools::tags$caption( | |
style = 'caption-side: bottom; text-align: center;', | |
htmltools::em('Team Per 240M Stats')), | |
escape = FALSE, extensions = c('ColReorder', 'Responsive') | |
) %>% formatRound(3,columns = 1:30) | |
table |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment