-
-
Save Sandy4321/149452597866bc07d8a9 to your computer and use it in GitHub Desktop.
NBA 2014-15 Heatmap
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| getBREFTeamStatTable <- | |
| function(season_end = 2015, table_name = 'team', date = T) { | |
| packages <- | |
| c('rvest','dplyr','pipeR','RCurl', 'XML','reshape2', 'tidyr', 'magrittr') | |
| lapply(packages, library, character.only = T) | |
| base <- | |
| 'http://www.basketball-reference.com/leagues/' | |
| season <- | |
| (season_end - 1) %>% | |
| paste0("-",season_end) | |
| league <- | |
| 'NBA' | |
| table_name %<>% | |
| tolower | |
| css_page <- | |
| '#' %>% paste0(table_name) | |
| table <- | |
| table_name %>% | |
| paste('stats', sep = "_") | |
| css_id <- | |
| css_page %>% | |
| paste0(" a") | |
| url <- | |
| base %>% | |
| paste0(league,'_',season_end,".html") | |
| if (table_name == 'standings') { | |
| t <- | |
| url %>% | |
| html %>% | |
| html_table(fill = T) | |
| if (season_end >= 1971) { | |
| df <- | |
| t[2] %>% data.frame %>% | |
| tbl_df | |
| names(df) <- | |
| c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs') | |
| df$conference <- | |
| 'Eastern' | |
| df2 <- | |
| t[3] %>% | |
| data.frame %>% | |
| tbl_df | |
| names(df2) <- | |
| c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs') | |
| df2$conference <- 'Western' | |
| df <- | |
| bind_rows(df,df2) | |
| df$playoff_team <- | |
| df$team %>% | |
| grepl('\\*',.) | |
| df$team %<>% | |
| gsub('\\*','',.) | |
| conference_rank <- | |
| df$team %>% | |
| colsplit('\\(',c('team','conference_rank')) %>% | |
| .$conference_rank | |
| df$conference_rank <- | |
| conference_rank %>% | |
| gsub('\\)','',.) %>% | |
| as.numeric | |
| df$team <- | |
| df$team %>% | |
| colsplit('\\(',c('team','conference_rank')) %>% | |
| .$team %>% | |
| Trim | |
| } else{ | |
| df <- | |
| t[2] %>% | |
| data.frame %>% | |
| tbl_df | |
| names(df) <- | |
| c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs') | |
| div <- | |
| df$team %>% grep('Western Division',.) %>% | |
| as.numeric | |
| df$conference <- | |
| '' | |
| df[1:div,'conference'] <- | |
| 'Eastern' | |
| df[div:nrow(df),'conference'] <- | |
| 'Western' | |
| if (grep('\\(',df$team) %>% length > 0) { | |
| conference_rank <- | |
| df$team %>% | |
| colsplit('\\(',c('team','conference_rank')) %>% | |
| .$conference_rank | |
| df$conference_rank <- | |
| conference_rank %>% gsub('\\)','',.) %>% | |
| as.numeric | |
| df$team <- | |
| df$team %>% | |
| colsplit('\\(',c('team','conference_rank')) %>% | |
| .$team %>% | |
| Trim | |
| } | |
| df$playoff_team <- | |
| df$team %>% | |
| grepl('\\*',.) | |
| df$team %<>% | |
| gsub('\\*','',.) | |
| } | |
| df[df$games_back == '—','games_back'] <- | |
| 0 | |
| df$games_back %<>% | |
| as.numeric | |
| df %<>% | |
| dplyr::filter(team == 'Baltimore Bullets' | !is.na(wins)) | |
| df$point_differential <- | |
| df$pts.g - df$opp_pts.g | |
| team_url_stems <- | |
| url %>% | |
| html %>% | |
| xpathSApply(path = '//*[(@id = "all_standings")]//a',xmlAttrs) %>% | |
| unlist | |
| names(team_url_stems) <- | |
| NULL | |
| bref_team_id <- | |
| team_url_stems %>% | |
| gsub('.html|\\/teams|\\/','',.) %>% | |
| gsub(pattern = "[^[:alpha:]]", replacement = "", .) | |
| df <- | |
| data.frame(season,table_name = 'standings', bref_team_id, df) %>% | |
| tbl_df | |
| df$bref_team_season_url <- | |
| 'http://www.basketball-reference.com' %>% | |
| paste0(team_url_stems) | |
| } | |
| else{ | |
| df <- | |
| url %>% ## get table | |
| html %>% | |
| html_nodes(css_page) %>% | |
| html_table(header = F) %>% | |
| data.frame %>% | |
| tbl_df | |
| if(df$X1[1] == 'Rk') { | |
| names <- | |
| df %>% | |
| dplyr::filter(X1 == "Rk") %>% | |
| as.character | |
| row_of_header <- | |
| 'Rk' %>% | |
| grep(x = df$X1) #find where rank is | |
| start <- | |
| row_of_header + 1 | |
| df <- | |
| df[start:nrow(df),] | |
| names %<>% | |
| tolower | |
| names(df) <- | |
| names | |
| } | |
| else{ | |
| names <- | |
| df %>% | |
| dplyr::filter(X1 == "Rk") %>% | |
| as.character | |
| row_of_header <- | |
| 'Rk' %>% | |
| grep(x = df$X1) | |
| start <- | |
| row_of_header + 1 | |
| df <- | |
| df[start:nrow(df),] | |
| names %<>% | |
| tolower | |
| names(df) <- | |
| names | |
| } | |
| names(df) %<>% | |
| gsub('\\%|/','\\.',.) | |
| df$rk <- | |
| NULL | |
| table_name_character <- | |
| c('team','arena') | |
| df[,!(df %>>% names) %in% table_name_character] %>>% apply(2, function(x) | |
| gsub('\\,','',x) %>>% | |
| as.numeric(x)) -> df[,!(df %>>% names) %in% table_name_character] #get rid of commas and make numeric | |
| df$playoff_team <- | |
| df$team %>% grepl(pattern = '\\*') | |
| df$team %<>% | |
| gsub('\\*','',.) | |
| rows <- | |
| df %>% | |
| nrow - 1 | |
| df <- | |
| df[1:rows,] | |
| xpath <- | |
| paste0("//*[(@id = '",table_name,"')]//a") | |
| ##Grab Team Ids | |
| stems <- | |
| url %>% ## get table | |
| html %>% | |
| xpathSApply(xpath,xmlAttrs) %>% | |
| unlist %>% | |
| as.character | |
| bref_team_id <- | |
| stems %>% | |
| gsub('\\/|.html|teams','',.) %>% | |
| gsub(season_end,'',.) | |
| df <- | |
| data.frame(season,table_name = table, bref_team_id, df) %>% | |
| tbl_df | |
| df$bref_team_season_url <- | |
| 'http://www.basketball-reference.com' %>% | |
| paste0(stems) | |
| } | |
| if (date == T) { | |
| df$scrape_time <- | |
| Sys.time() | |
| } | |
| return(df) | |
| } | |
| library(d3heatmap) | |
| team2015 <- | |
| getBREFTeamStatTable(season_end = 2015, table_name = 'team') | |
| team2015 %<>% | |
| arrange(team) | |
| data <- | |
| team2015 %>% | |
| select(fg:pts.g) %>% | |
| select(-fg.,-X3p., -X2p., -pts.g, -ft.) | |
| rownames(data) <- | |
| team2015$team | |
| mp <- | |
| team2015$mp | |
| data.per240 <- | |
| data %>% | |
| apply(2, function(x) x / mp * 48 * 5) | |
| row.names(data.per240) <- | |
| team2015$team | |
| unclustered <- | |
| data.per240 %>% | |
| d3heatmap(colors = "RdYlBu", theme = "dark",cluster = F) | |
| clustered <- | |
| data.per240 %>% | |
| d3heatmap(colors = "RdYlBu", theme = "dark") | |
| clustered | |
| unclustered | |
| library(DT) | |
| data.per240 <- | |
| data.per240[,colnames(data.per240)[c(17,1:6,12,11:9, 13:14, 7:8,16)]] | |
| table <- | |
| data.per240 %>% | |
| datatable(colnames = | |
| colnames(data.per240), rownames = TRUE, filter = 'top', | |
| options = list(pageLength = 30, lengthMenu = c(5, 10, 15, 20, 25, 30), dom = 'Rlfrtip',colReorder = list(realtime = TRUE), initComplete = JS( | |
| "function(settings, json) {", | |
| "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});", | |
| "}")), | |
| caption = htmltools::tags$caption( | |
| style = 'caption-side: bottom; text-align: center;', | |
| htmltools::em('Team Per 240M Stats')), | |
| escape = FALSE, extensions = c('ColReorder', 'Responsive') | |
| ) %>% formatRound(3,columns = 1:30) | |
| table |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment