Skip to content

Instantly share code, notes, and snippets.

@Sandy4321
Forked from abresler/nba_kohonen_viz
Created December 18, 2015 15:16
Show Gist options
  • Save Sandy4321/974e8d1faaaa1912da2b to your computer and use it in GitHub Desktop.
Save Sandy4321/974e8d1faaaa1912da2b to your computer and use it in GitHub Desktop.
Interactive NBA Kohonen Viz
nba_kohonen_viz <- function(season.end = 2015, team.totals = T, min_games = 5, scale_per_minute = T, xdim = 12, ydim = 12, clusters = 12, show.ids = T){
get_bref_player_season_stats <- function(season.end, stat_type = c("Advanced","Totals","Per Minute","Per Game"), team.totals = F , league = 'NBA'){
packages <-
c('rvest','magrittr','dplyr','stringr','tidyr')
lapply(packages, library, character.only = T)
bref_team_base <-
'http://www.basketball-reference.com/leagues/'
bref_base <-
'http://www.basketball-reference.com'
stat_type <-
match.arg(stat_type,choices = c("Advanced",'Totals','Per Minute',"Per Game"))
stat_type %<>%
tolower %>% gsub("\\ ",'_',.)
url <-
paste0(bref_team_base,league,'_',season.end,'_',stat_type,'.html')
css_page <-
paste0('#',stat_type)
css_player <-
'td:nth-child(2) a'
page <-
url %>%
read_html
tables <-
page %>%
html_table(fill = T)
data <-
tables[1] %>%
data.frame %>%
tbl_df
url.player <-
page %>%
html_nodes(css_player) %>%
html_attr('href') %>%
paste0('http://www.basketball-reference.com/',.)
names(data) %<>%
tolower
data %<>%
dplyr::filter(!rk == 'Rk')
data %<>%
dplyr::select(-rk) %>%
rename(id.team = tm, id.pos = pos)
if(team.totals == T){
multi.teams <-
data %>%
dplyr::filter(id.team == "TOT")
one.team <-
data %>%
dplyr::filter(!player %in% multi.teams$player)
data <-
bind_rows(multi.teams,one.team) %>%
arrange(player)
} else{
data %<>%
dplyr::filter(!id.team == 'TOT')
}
numerics <-
data %>%
dplyr::select(-player, -id.pos, -id.team) %>%
names
data[,numerics] %<>%
apply(2, as.numeric)
season.start <-
season.end - 1
id.season <-
season.start %>%
paste0('-',season.end %>% substr(3,4))
data %<>%
mutate(season.end,
id.season,
id.pos = data$id.pos %>% substr(1,2),
id.name.table = stat_type,
url.season = url)
if(c('var.20') %in% names(data)){
data %<>%
dplyr::select(-var.20, -var.25)
}
return(data)
}
data <-
get_bref_player_season_stats(season.end = season.end, stat_type = 'Totals')
packages <-
c('kohonen', 'igraph', 'visNetwork', 'htmlwidgets', 'materializeR')
lapply(packages, library, character.only = T)
min_games <-
5
data %<>%
dplyr::filter(g >= min_games) %>%
tbl_df
set.seed(7)
som_grid <-
somgrid(xdim = xdim, ydim = ydim, topo = "hexagonal")
kohonen.data <-
data %>%
dplyr::select(fg, fga, x3p, x3pa, x2p, x2pa, ft, fta, orb, trb, ast:pts)
if(scale_per_minute == T){
kohonen.data %<>%
apply(2, function(x) x / data$mp)
}
kohonen.matrix <-
kohonen.data %>%
as.matrix %>%
scale
names(kohonen.matrix) <-
data %>%
dplyr::select(fg, fga, x3p, x3pa, x2p, x2pa, ft, fta, orb, trb, ast:pts) %>% names
som_model_nba <-
kohonen.matrix %>%
som(
grid = som_grid,
rlen = 100, # could be 100
alpha = c(.05,.01),
keep.data = T,
n.hood = 'circular'
)
som_cluster <-
som_model_nba$codes %>%
dist %>%
hclust %>%
cutree(clusters)
cluster.player <-
som_cluster[som_model_nba$unit.classif]
group.player <-
som_model_nba$unit.classif
data$cluster.player <-
cluster.player
data$group.player <-
group.player
plot.data <-
data %>%
dplyr::select(cluster.player, group.player, player:url.season, id.pos) %>%
dplyr::select(player, cluster.player, group.player, id.pos, id.team)
players.nodes <-
plot.data %>%
dplyr::select(label = player, group = id.pos) %>%
mutate(id = paste0("P",1:nrow(.))) %>%
dplyr::select(id, label, group)
group.nodes <-
plot.data %>%
dplyr::select(group.player) %>%
distinct() %>%
arrange(group.player) %>%
mutate(label = paste('Group',group.player),
id = paste0('G', 1:nrow(.))) %>%
dplyr::select(id, label)
cluster.nodes <-
plot.data %>%
dplyr::select(cluster.player) %>%
distinct() %>%
arrange(cluster.player) %>%
mutate(label = paste('Cluster',cluster.player),
id = paste0('C', 1:nrow(.))) %>%
dplyr::select(id, label)
nodes <-
cluster.nodes %>%
bind_rows(group.nodes, players.nodes)
id.clusters <-
plot.data %>%
mutate(label = paste("Cluster", cluster.player)) %>%
dplyr::select(label, cluster.player) %>%
distinct() %>%
left_join(nodes) %>%
rename(id.cluster = id)
id.groups <-
plot.data %>%
mutate(label = paste("Group", group.player)) %>%
dplyr::select(label, group.player) %>%
distinct() %>%
left_join(nodes) %>%
rename(id.group = id)
id.players <-
plot.data %>%
dplyr::select(label = player, player, group = id.pos) %>%
dplyr::select(label, group) %>%
left_join(nodes) %>%
rename(id.player = id)
cluster.group.edges <-
plot.data %>%
dplyr::select(cluster.player, group.player) %>%
distinct() %>%
left_join(id.clusters)
cluster.group.edges %<>%
dplyr::select(group.player) %>%
left_join(id.groups) %>%
dplyr::select(-label,-group) %>%
left_join(cluster.group.edges) %>%
dplyr::select(from = id.cluster, to = id.group)
players.edges <-
plot.data %>%
dplyr::select(label = player,group.player) %>%
left_join(id.players) %>%
dplyr::select(group.player, to = id.player)
players.edges %<>%
left_join(id.groups) %>%
dplyr::select(from = id.group, to)
edges <-
bind_rows(players.edges,cluster.group.edges)
if(show.ids == T){
kohonen.group <-
nodes %>%
visNetwork(edges, width = '100%') %>%
visOptions(navigation = T , nodesIdSelection = TRUE)
} else{
kohonen.group <-
nodes %>%
visNetwork(edges, width = '100%') %>%
visOptions(navigation = T)
}
return(kohonen.group)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment