-
-
Save Sandy4321/974e8d1faaaa1912da2b to your computer and use it in GitHub Desktop.
Interactive NBA Kohonen Viz
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| nba_kohonen_viz <- function(season.end = 2015, team.totals = T, min_games = 5, scale_per_minute = T, xdim = 12, ydim = 12, clusters = 12, show.ids = T){ | |
| get_bref_player_season_stats <- function(season.end, stat_type = c("Advanced","Totals","Per Minute","Per Game"), team.totals = F , league = 'NBA'){ | |
| packages <- | |
| c('rvest','magrittr','dplyr','stringr','tidyr') | |
| lapply(packages, library, character.only = T) | |
| bref_team_base <- | |
| 'http://www.basketball-reference.com/leagues/' | |
| bref_base <- | |
| 'http://www.basketball-reference.com' | |
| stat_type <- | |
| match.arg(stat_type,choices = c("Advanced",'Totals','Per Minute',"Per Game")) | |
| stat_type %<>% | |
| tolower %>% gsub("\\ ",'_',.) | |
| url <- | |
| paste0(bref_team_base,league,'_',season.end,'_',stat_type,'.html') | |
| css_page <- | |
| paste0('#',stat_type) | |
| css_player <- | |
| 'td:nth-child(2) a' | |
| page <- | |
| url %>% | |
| read_html | |
| tables <- | |
| page %>% | |
| html_table(fill = T) | |
| data <- | |
| tables[1] %>% | |
| data.frame %>% | |
| tbl_df | |
| url.player <- | |
| page %>% | |
| html_nodes(css_player) %>% | |
| html_attr('href') %>% | |
| paste0('http://www.basketball-reference.com/',.) | |
| names(data) %<>% | |
| tolower | |
| data %<>% | |
| dplyr::filter(!rk == 'Rk') | |
| data %<>% | |
| dplyr::select(-rk) %>% | |
| rename(id.team = tm, id.pos = pos) | |
| if(team.totals == T){ | |
| multi.teams <- | |
| data %>% | |
| dplyr::filter(id.team == "TOT") | |
| one.team <- | |
| data %>% | |
| dplyr::filter(!player %in% multi.teams$player) | |
| data <- | |
| bind_rows(multi.teams,one.team) %>% | |
| arrange(player) | |
| } else{ | |
| data %<>% | |
| dplyr::filter(!id.team == 'TOT') | |
| } | |
| numerics <- | |
| data %>% | |
| dplyr::select(-player, -id.pos, -id.team) %>% | |
| names | |
| data[,numerics] %<>% | |
| apply(2, as.numeric) | |
| season.start <- | |
| season.end - 1 | |
| id.season <- | |
| season.start %>% | |
| paste0('-',season.end %>% substr(3,4)) | |
| data %<>% | |
| mutate(season.end, | |
| id.season, | |
| id.pos = data$id.pos %>% substr(1,2), | |
| id.name.table = stat_type, | |
| url.season = url) | |
| if(c('var.20') %in% names(data)){ | |
| data %<>% | |
| dplyr::select(-var.20, -var.25) | |
| } | |
| return(data) | |
| } | |
| data <- | |
| get_bref_player_season_stats(season.end = season.end, stat_type = 'Totals') | |
| packages <- | |
| c('kohonen', 'igraph', 'visNetwork', 'htmlwidgets', 'materializeR') | |
| lapply(packages, library, character.only = T) | |
| min_games <- | |
| 5 | |
| data %<>% | |
| dplyr::filter(g >= min_games) %>% | |
| tbl_df | |
| set.seed(7) | |
| som_grid <- | |
| somgrid(xdim = xdim, ydim = ydim, topo = "hexagonal") | |
| kohonen.data <- | |
| data %>% | |
| dplyr::select(fg, fga, x3p, x3pa, x2p, x2pa, ft, fta, orb, trb, ast:pts) | |
| if(scale_per_minute == T){ | |
| kohonen.data %<>% | |
| apply(2, function(x) x / data$mp) | |
| } | |
| kohonen.matrix <- | |
| kohonen.data %>% | |
| as.matrix %>% | |
| scale | |
| names(kohonen.matrix) <- | |
| data %>% | |
| dplyr::select(fg, fga, x3p, x3pa, x2p, x2pa, ft, fta, orb, trb, ast:pts) %>% names | |
| som_model_nba <- | |
| kohonen.matrix %>% | |
| som( | |
| grid = som_grid, | |
| rlen = 100, # could be 100 | |
| alpha = c(.05,.01), | |
| keep.data = T, | |
| n.hood = 'circular' | |
| ) | |
| som_cluster <- | |
| som_model_nba$codes %>% | |
| dist %>% | |
| hclust %>% | |
| cutree(clusters) | |
| cluster.player <- | |
| som_cluster[som_model_nba$unit.classif] | |
| group.player <- | |
| som_model_nba$unit.classif | |
| data$cluster.player <- | |
| cluster.player | |
| data$group.player <- | |
| group.player | |
| plot.data <- | |
| data %>% | |
| dplyr::select(cluster.player, group.player, player:url.season, id.pos) %>% | |
| dplyr::select(player, cluster.player, group.player, id.pos, id.team) | |
| players.nodes <- | |
| plot.data %>% | |
| dplyr::select(label = player, group = id.pos) %>% | |
| mutate(id = paste0("P",1:nrow(.))) %>% | |
| dplyr::select(id, label, group) | |
| group.nodes <- | |
| plot.data %>% | |
| dplyr::select(group.player) %>% | |
| distinct() %>% | |
| arrange(group.player) %>% | |
| mutate(label = paste('Group',group.player), | |
| id = paste0('G', 1:nrow(.))) %>% | |
| dplyr::select(id, label) | |
| cluster.nodes <- | |
| plot.data %>% | |
| dplyr::select(cluster.player) %>% | |
| distinct() %>% | |
| arrange(cluster.player) %>% | |
| mutate(label = paste('Cluster',cluster.player), | |
| id = paste0('C', 1:nrow(.))) %>% | |
| dplyr::select(id, label) | |
| nodes <- | |
| cluster.nodes %>% | |
| bind_rows(group.nodes, players.nodes) | |
| id.clusters <- | |
| plot.data %>% | |
| mutate(label = paste("Cluster", cluster.player)) %>% | |
| dplyr::select(label, cluster.player) %>% | |
| distinct() %>% | |
| left_join(nodes) %>% | |
| rename(id.cluster = id) | |
| id.groups <- | |
| plot.data %>% | |
| mutate(label = paste("Group", group.player)) %>% | |
| dplyr::select(label, group.player) %>% | |
| distinct() %>% | |
| left_join(nodes) %>% | |
| rename(id.group = id) | |
| id.players <- | |
| plot.data %>% | |
| dplyr::select(label = player, player, group = id.pos) %>% | |
| dplyr::select(label, group) %>% | |
| left_join(nodes) %>% | |
| rename(id.player = id) | |
| cluster.group.edges <- | |
| plot.data %>% | |
| dplyr::select(cluster.player, group.player) %>% | |
| distinct() %>% | |
| left_join(id.clusters) | |
| cluster.group.edges %<>% | |
| dplyr::select(group.player) %>% | |
| left_join(id.groups) %>% | |
| dplyr::select(-label,-group) %>% | |
| left_join(cluster.group.edges) %>% | |
| dplyr::select(from = id.cluster, to = id.group) | |
| players.edges <- | |
| plot.data %>% | |
| dplyr::select(label = player,group.player) %>% | |
| left_join(id.players) %>% | |
| dplyr::select(group.player, to = id.player) | |
| players.edges %<>% | |
| left_join(id.groups) %>% | |
| dplyr::select(from = id.group, to) | |
| edges <- | |
| bind_rows(players.edges,cluster.group.edges) | |
| if(show.ids == T){ | |
| kohonen.group <- | |
| nodes %>% | |
| visNetwork(edges, width = '100%') %>% | |
| visOptions(navigation = T , nodesIdSelection = TRUE) | |
| } else{ | |
| kohonen.group <- | |
| nodes %>% | |
| visNetwork(edges, width = '100%') %>% | |
| visOptions(navigation = T) | |
| } | |
| return(kohonen.group) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment