-
-
Save Sandy4321/974e8d1faaaa1912da2b to your computer and use it in GitHub Desktop.
Interactive NBA Kohonen Viz
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nba_kohonen_viz <- function(season.end = 2015, team.totals = T, min_games = 5, scale_per_minute = T, xdim = 12, ydim = 12, clusters = 12, show.ids = T){ | |
get_bref_player_season_stats <- function(season.end, stat_type = c("Advanced","Totals","Per Minute","Per Game"), team.totals = F , league = 'NBA'){ | |
packages <- | |
c('rvest','magrittr','dplyr','stringr','tidyr') | |
lapply(packages, library, character.only = T) | |
bref_team_base <- | |
'http://www.basketball-reference.com/leagues/' | |
bref_base <- | |
'http://www.basketball-reference.com' | |
stat_type <- | |
match.arg(stat_type,choices = c("Advanced",'Totals','Per Minute',"Per Game")) | |
stat_type %<>% | |
tolower %>% gsub("\\ ",'_',.) | |
url <- | |
paste0(bref_team_base,league,'_',season.end,'_',stat_type,'.html') | |
css_page <- | |
paste0('#',stat_type) | |
css_player <- | |
'td:nth-child(2) a' | |
page <- | |
url %>% | |
read_html | |
tables <- | |
page %>% | |
html_table(fill = T) | |
data <- | |
tables[1] %>% | |
data.frame %>% | |
tbl_df | |
url.player <- | |
page %>% | |
html_nodes(css_player) %>% | |
html_attr('href') %>% | |
paste0('http://www.basketball-reference.com/',.) | |
names(data) %<>% | |
tolower | |
data %<>% | |
dplyr::filter(!rk == 'Rk') | |
data %<>% | |
dplyr::select(-rk) %>% | |
rename(id.team = tm, id.pos = pos) | |
if(team.totals == T){ | |
multi.teams <- | |
data %>% | |
dplyr::filter(id.team == "TOT") | |
one.team <- | |
data %>% | |
dplyr::filter(!player %in% multi.teams$player) | |
data <- | |
bind_rows(multi.teams,one.team) %>% | |
arrange(player) | |
} else{ | |
data %<>% | |
dplyr::filter(!id.team == 'TOT') | |
} | |
numerics <- | |
data %>% | |
dplyr::select(-player, -id.pos, -id.team) %>% | |
names | |
data[,numerics] %<>% | |
apply(2, as.numeric) | |
season.start <- | |
season.end - 1 | |
id.season <- | |
season.start %>% | |
paste0('-',season.end %>% substr(3,4)) | |
data %<>% | |
mutate(season.end, | |
id.season, | |
id.pos = data$id.pos %>% substr(1,2), | |
id.name.table = stat_type, | |
url.season = url) | |
if(c('var.20') %in% names(data)){ | |
data %<>% | |
dplyr::select(-var.20, -var.25) | |
} | |
return(data) | |
} | |
data <- | |
get_bref_player_season_stats(season.end = season.end, stat_type = 'Totals') | |
packages <- | |
c('kohonen', 'igraph', 'visNetwork', 'htmlwidgets', 'materializeR') | |
lapply(packages, library, character.only = T) | |
min_games <- | |
5 | |
data %<>% | |
dplyr::filter(g >= min_games) %>% | |
tbl_df | |
set.seed(7) | |
som_grid <- | |
somgrid(xdim = xdim, ydim = ydim, topo = "hexagonal") | |
kohonen.data <- | |
data %>% | |
dplyr::select(fg, fga, x3p, x3pa, x2p, x2pa, ft, fta, orb, trb, ast:pts) | |
if(scale_per_minute == T){ | |
kohonen.data %<>% | |
apply(2, function(x) x / data$mp) | |
} | |
kohonen.matrix <- | |
kohonen.data %>% | |
as.matrix %>% | |
scale | |
names(kohonen.matrix) <- | |
data %>% | |
dplyr::select(fg, fga, x3p, x3pa, x2p, x2pa, ft, fta, orb, trb, ast:pts) %>% names | |
som_model_nba <- | |
kohonen.matrix %>% | |
som( | |
grid = som_grid, | |
rlen = 100, # could be 100 | |
alpha = c(.05,.01), | |
keep.data = T, | |
n.hood = 'circular' | |
) | |
som_cluster <- | |
som_model_nba$codes %>% | |
dist %>% | |
hclust %>% | |
cutree(clusters) | |
cluster.player <- | |
som_cluster[som_model_nba$unit.classif] | |
group.player <- | |
som_model_nba$unit.classif | |
data$cluster.player <- | |
cluster.player | |
data$group.player <- | |
group.player | |
plot.data <- | |
data %>% | |
dplyr::select(cluster.player, group.player, player:url.season, id.pos) %>% | |
dplyr::select(player, cluster.player, group.player, id.pos, id.team) | |
players.nodes <- | |
plot.data %>% | |
dplyr::select(label = player, group = id.pos) %>% | |
mutate(id = paste0("P",1:nrow(.))) %>% | |
dplyr::select(id, label, group) | |
group.nodes <- | |
plot.data %>% | |
dplyr::select(group.player) %>% | |
distinct() %>% | |
arrange(group.player) %>% | |
mutate(label = paste('Group',group.player), | |
id = paste0('G', 1:nrow(.))) %>% | |
dplyr::select(id, label) | |
cluster.nodes <- | |
plot.data %>% | |
dplyr::select(cluster.player) %>% | |
distinct() %>% | |
arrange(cluster.player) %>% | |
mutate(label = paste('Cluster',cluster.player), | |
id = paste0('C', 1:nrow(.))) %>% | |
dplyr::select(id, label) | |
nodes <- | |
cluster.nodes %>% | |
bind_rows(group.nodes, players.nodes) | |
id.clusters <- | |
plot.data %>% | |
mutate(label = paste("Cluster", cluster.player)) %>% | |
dplyr::select(label, cluster.player) %>% | |
distinct() %>% | |
left_join(nodes) %>% | |
rename(id.cluster = id) | |
id.groups <- | |
plot.data %>% | |
mutate(label = paste("Group", group.player)) %>% | |
dplyr::select(label, group.player) %>% | |
distinct() %>% | |
left_join(nodes) %>% | |
rename(id.group = id) | |
id.players <- | |
plot.data %>% | |
dplyr::select(label = player, player, group = id.pos) %>% | |
dplyr::select(label, group) %>% | |
left_join(nodes) %>% | |
rename(id.player = id) | |
cluster.group.edges <- | |
plot.data %>% | |
dplyr::select(cluster.player, group.player) %>% | |
distinct() %>% | |
left_join(id.clusters) | |
cluster.group.edges %<>% | |
dplyr::select(group.player) %>% | |
left_join(id.groups) %>% | |
dplyr::select(-label,-group) %>% | |
left_join(cluster.group.edges) %>% | |
dplyr::select(from = id.cluster, to = id.group) | |
players.edges <- | |
plot.data %>% | |
dplyr::select(label = player,group.player) %>% | |
left_join(id.players) %>% | |
dplyr::select(group.player, to = id.player) | |
players.edges %<>% | |
left_join(id.groups) %>% | |
dplyr::select(from = id.group, to) | |
edges <- | |
bind_rows(players.edges,cluster.group.edges) | |
if(show.ids == T){ | |
kohonen.group <- | |
nodes %>% | |
visNetwork(edges, width = '100%') %>% | |
visOptions(navigation = T , nodesIdSelection = TRUE) | |
} else{ | |
kohonen.group <- | |
nodes %>% | |
visNetwork(edges, width = '100%') %>% | |
visOptions(navigation = T) | |
} | |
return(kohonen.group) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment