Created
June 30, 2021 12:04
-
-
Save bhoung/3833c6afa438296c6c6dfe8964d20da7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- | |
| title: Minutes played by Nets, playoffs | |
| layout: post | |
| categories: | |
| - posts | |
| tags: | |
| - nba, sports | |
| output: | |
| #html_document | |
| md_document: | |
| variant: markdown_github+backtick_code_blocks | |
| preserve_yaml: true | |
| toc: false | |
| fig_retina: 2 | |
| --- | |
| ```{r setup, include=FALSE} | |
| knitr::opts_chunk$set(echo = TRUE) | |
| ``` | |
| ```{r include=FALSE} | |
| library(rvest) | |
| library(cowplot) | |
| library(tidyverse) | |
| library(xml2) | |
| library(stringi) | |
| ``` | |
| ```{r include=FALSE} | |
| roster <- "https://www.basketball-reference.com/teams/BRK/2021.html" | |
| html <- read_html(roster) | |
| tables <- html %>% html_nodes("table") %>% html_table() | |
| df <- tables[1] %>% data.frame() | |
| players <- html %>% html_nodes("table") %>% xml_find_all(".//a") %>% xml_attrs() | |
| gamelogs <- unlist(players) | |
| pgl <- gamelogs[grep("gamelog", gamelogs)] %>% unique() %>% list() | |
| pgl <- pgl[[1]] | |
| playoffs_pl <- pgl[c(1,2,3,4,6,9,10,14,12,15,18)] | |
| ``` | |
| ```{r include=FALSE} | |
| get_player_url <- function(path) { | |
| paste("https://www.basketball-reference.com/",path,sep="") | |
| } | |
| player_urls <- unlist(lapply(FUN=get_player_url, playoffs_pl)) | |
| get_data <- function(player_url) { | |
| source <- readLines(player_url) | |
| src_text <- paste0(source, collapse = "") | |
| table_regex <- as.character(unlist(stri_extract_all(src_text, regex='<table(.*?)/table>', omit_no_match = T, simplify = T))) | |
| #table 9 is playoff table | |
| html_parse2 <- read_html(table_regex[9]) | |
| tdf <- html_table(html_parse2) %>% data.frame() | |
| return(tdf) | |
| } | |
| ``` | |
| ```{r include=FALSE} | |
| player_url <- player_urls[1] | |
| tdf <- get_data(player_url) | |
| tdf$date <- as.Date(tdf$Date, "%Y-%m-%d") | |
| tdf$min <- as.difftime(tdf$MP, format = "%M:%S", units = "mins") | |
| ``` | |
| ```{r include=FALSE} | |
| library(stringr) | |
| plist <- list() | |
| for (p in player_urls) { | |
| pname <- str_split(p, "/")[[1]][7] | |
| plist <- c(plist, pname) # Add element to list | |
| data <- get_data(p) | |
| assign(pname, data) | |
| } | |
| unlist(plist) | |
| ``` | |
| ```{r include=FALSE} | |
| i = 1 | |
| for (p in plist) { | |
| if (i == 1) { | |
| data = get(p) | |
| data$player <- p | |
| } else { | |
| df = get(p) | |
| df$player <- p | |
| data = rbind(data, df) | |
| } | |
| i = i + 1 | |
| } | |
| ``` | |
| ```{r echo=FALSE} | |
| table(data$player) | |
| ``` | |
| ```{r include=FALSE} | |
| library(tidyverse) | |
| nets_games_url <- 'https://www.basketball-reference.com/teams/BRK/2021_games.html' | |
| html <- read_html(nets_games_url) | |
| tables <- html %>% html_nodes("table") %>% html_table() | |
| nets_regular_season <- tables[[1]] %>% data.frame() | |
| nets_playoffs <- tables[[2]] %>% data.frame() | |
| games <- nets_playoffs %>% select(G, Date, Var.6, Opponent, Var.8, Tm, Opp) | |
| names(games) <- c("G", "Date", "Home", "Opponent", "WinLoss", "Team", "Opp") | |
| ``` | |
| ```{r include=FALSE} | |
| games$date <- apply(games, 1, FUN = function(x) {substr(x[c("Date")], 6, 20) }) | |
| games$date <- as.Date(games$date, c("%B %d, %Y")) | |
| #glimpse(games) | |
| games$Team <- as.integer(games$Team) | |
| games$Opp <- as.integer(games$Opp) | |
| games$diff <- games$Team - games$Opp | |
| ``` | |
| ```{r warning=FALSE, include=FALSE} | |
| games$home <- ifelse(games$Home != '@', 'HOME', 'AWAY') | |
| table(games$home) | |
| ``` | |
| ```{r include=FALSE} | |
| data$date <- as.Date(data$Date, format="%Y-%m-%d") | |
| dfm <- games %>% left_join(data, by=c("date"="date")) | |
| mp <- dfm %>% select(date, MP, player, WinLoss, diff, home, Opponent, `G.x`) | |
| mp$min <- as.difftime(mp$MP, format = "%M:%S", units = "mins") | |
| ``` | |
| ```{r echo=FALSE, message=FALSE, warning=FALSE} | |
| library(cowplot) | |
| library(ggrepel) | |
| mp <- mp %>% mutate(player = case_when( | |
| player == "brownbr01" ~ "Brown", | |
| player == "claxtni01" ~ "Claxton", | |
| player == "duranke01" ~ "Durant", | |
| player == "greenje02" ~ "Green", | |
| player == "griffbl01" ~ "Griffin", | |
| player == "hardeja01" ~ "Harden", | |
| player == "harrijo01" ~ "Harris", | |
| player == "irvinky01" ~ "Irving", | |
| player == "jamesmi02" ~ "James", | |
| player == "johnsty01" ~ "Johnston", | |
| player == "shamela01" ~ "Shamet", | |
| )) | |
| reduced_mins <- c("Johnston","Claxton","James","Shamet","Green") | |
| mp %>% filter(!is.na(player)) %>% ggplot(.) + geom_point(aes(x=date, y=min, colour=player)) + facet_grid(WinLoss ~ .) + theme_cowplot() | |
| mp %>% filter(!is.na(player)) %>% ggplot(.) + geom_line(aes(x=date, y=min, colour=player)) + facet_grid(home ~ .) + theme_cowplot() | |
| mp %>% filter(!is.na(player) & !(player %in% reduced_mins)) %>% ggplot(.) + geom_line(aes(x=date, y=min, colour=player)) + theme_cowplot() + labs(y="minutes played") | |
| mp %>% filter(!is.na(player) & player %in% reduced_mins) %>% ggplot(.) + geom_line(aes(x=date, y=min, colour=player)) + theme_cowplot() + labs(y="minutes played") | |
| mp <- mp %>% group_by(Opponent, player) %>% arrange(date) %>% mutate(gm = row_number()) | |
| p1 <- mp %>% filter(!is.na(player) & player %in% reduced_mins) %>% ggplot(.) + geom_line(aes(x=gm, y=min, colour=player)) + theme_cowplot() + labs(y="minutes played") + facet_grid(Opponent ~ .) + scale_x_discrete(limits=seq(1,7)) + labs(x="game") + theme(legend.position = "none") + geom_text_repel(aes(x=gm, y=min, label=ifelse(gm %in% c(1,5), player, ""))) | |
| p1 | |
| p1 <- mp %>% filter(!is.na(player) & !(player %in% reduced_mins)) %>% ggplot(.) + geom_col(aes(x=gm, y=(min/5), colour=player, fill=player)) + theme_cowplot() + labs(y="minutes played", colour="Player", fill="Player") + facet_grid(Opponent ~ .) + scale_x_discrete(limits=factor(seq(1,7))) | |
| p2 <- mp %>% filter(!is.na(player) & (player %in% reduced_mins)) %>% ggplot(.) + geom_col(aes(x=gm, y=(min/5), colour=player, fill=player)) + theme_cowplot() + labs(y="minutes played", colour="Player", fill="Player") + facet_grid(Opponent ~ .) + scale_x_discrete(limits=factor(seq(1,7))) | |
| mp %>% filter(!is.na(player) & !(player %in% reduced_mins)) %>% ggplot(.) + geom_line(aes(x=gm, y=min, colour=player)) + theme_cowplot() + labs(y="minutes played") + facet_grid(Opponent ~ .) + scale_x_discrete(limits=factor(seq(1,7))) + | |
| theme(legend.position = "none") + geom_text_repel(aes(x=gm, y=min, label=ifelse(gm == 1, player, ""))) | |
| p1 | |
| p2 | |
| #plot_grid(p1, p2, nrow=2, ncol=1) | |
| ``` | |
| References: | |
| https://stackoverflow.com/questions/40616357/how-to-scrape-tables-inside-a-comment-tag-in-html-with-r | |
| https://stackoverflow.com/questions/49057868/extracting-text-within-tags-inside-html-comments-with-beautifulsoup |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment