Last active
August 7, 2017 13:56
-
-
Save JoGall/8fa2fef985d916aeef09aa6a98bcb72c to your computer and use it in GitHub Desktop.
On 'Hoodability' and conceding set piece goals
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Data on set piece goals scored and conceded in 2016-17 from [here](https://objective-football.blogspot.co.uk/2016/08/201617-open-playset-piecepenalty-data.html). | |
| --- | |
| # Get data | |
| ```{r} | |
| library(dplyr) | |
| library(XML) | |
| library(RCurl) | |
| library(engsoccerdata) | |
| library(ggplot2) | |
| library(ggrepel) | |
| #set ggplot theme | |
| my_theme <- theme_bw() + | |
| theme( | |
| axis.line = element_line(colour = "black"), | |
| axis.title = element_text(size=14, colour = "black", family='Roboto Condensed Light'), | |
| axis.text = element_text(size=14, colour = "black", family='Roboto Condensed Light'), | |
| legend.position = "none" | |
| ) | |
| # 1) Get set piece goals data | |
| # scrape table from url | |
| tmp <- "https://objective-football.blogspot.co.uk/2016/08/201617-open-playset-piecepenalty-data.html" %>% | |
| getURL() %>% | |
| readHTMLTable(header = TRUE) | |
| tmp <- tmp[[1]] | |
| # reformat | |
| setpieces <- data.frame(team = tmp[,2], GF = as.numeric(as.character(tmp[,7])) + as.numeric(as.character(tmp[,18])) + as.numeric(as.character(tmp[,25])), GA = as.numeric(as.character(tmp[,8])) + as.numeric(as.character(tmp[,19])) + as.numeric(as.character(tmp[,26])), SPGF = tmp[,18], SPGA = tmp[,19])[-21,] | |
| # 2) Get final league positions for 2016-17 season from engsoccerdata | |
| # update 'england' dataframe if there are new results | |
| england <- rbind(england, subset(england_current(), tier == 1 & !(Date %in% england$Date & home %in% england$home))) | |
| # calculate final league positions | |
| pos <- maketable(england, Season = 2016, tier = 1) %>% | |
| arrange(team) | |
| # 3) Merge dataframes | |
| # join based on alphabetical order of teams (order of AFC Bournemouth & Arsenal require switching) | |
| d <- data.frame(team = pos$team, pos = pos$Pos, setpieces[c(2,1,3:20),-1]) | |
| # convert factors to numeric | |
| d <- data.frame(team = pos$team, apply(d[,2:6], 2, function(x) as.numeric(as.character(x))) ) | |
| ``` | |
| Plot %age total goals SCORED from set pieces vs. final league position | |
| ```{r, dpi=300} | |
| ggplot(d, aes(x = pos, y = SPGF / GF * 100)) + | |
| geom_point(size = 2) + | |
| geom_smooth(method='lm', col="black", alpha=0.25) + | |
| scale_x_continuous(limits = c(1,20), breaks = c(1, 5, 10, 15, 20)) + | |
| ylab("% total goals scored from set pieces") + | |
| xlab("Final league position") + | |
| geom_label_repel( | |
| aes(label = team), | |
| fontface = 'bold', | |
| box.padding = unit(0.35, "lines"), | |
| point.padding = unit(0.5, "lines"), | |
| segment.color = 'grey50' | |
| ) + | |
| my_theme | |
| ``` | |
| Plot %age total goals CONCEDED from set pieces vs. final league position | |
| ```{r, dpi=300} | |
| ggplot(d, aes(x = pos, y = SPGA / GA * 100)) + | |
| geom_point(size = 2) + | |
| geom_smooth(method='lm', col="black", alpha=0.25) + | |
| scale_x_continuous(limits = c(1,20), breaks = c(1, 5, 10, 15, 20)) + | |
| ylab("% total goals conceded from set pieces") + | |
| xlab("Final league position") + | |
| geom_label_repel( | |
| aes(label = team), | |
| fontface = 'bold', | |
| box.padding = unit(0.35, "lines"), | |
| point.padding = unit(0.5, "lines"), | |
| segment.color = 'grey50' | |
| ) + | |
| my_theme | |
| ``` | |
| Plot %age total goals CONCEDED from set pieces vs. 'Hoodability' (defined [here](https://jogall.github.io/2017-08-04-robin-hood-teams/)) | |
| ```{r} | |
| # calculate hoodability | |
| # Update 'england' dataframe if there are new results | |
| england <- rbind(england, subset(england_current(), tier == 1 & !(Date %in% england$Date & home %in% england$home))) | |
| #make league table | |
| x_table <- maketable(england, Season = 2016, tier = 1) %>% | |
| mutate(Pos = as.numeric(Pos)) %>% | |
| dplyr::select(team, Pos) | |
| #get all teams plus top 6 and bottom 6 | |
| all_teams <- x_table$team | |
| top_teams <- subset(x_table, Pos %in% 1:6)$team | |
| bottom_teams <- subset(x_table, Pos %in% 15:20)$team | |
| #for each team in league that season | |
| hoodability <- lapply(all_teams, function(y) { | |
| #league position | |
| pos <- subset(x_table, team == y)$Pos | |
| #ppg against top 6 | |
| top <- subset(homeaway(subset(england, tier == 1 & Season == 2016)), team == y & opp %in% top_teams) %>% | |
| mutate(Pts = ifelse(gf > ga, 3, ifelse(gf < ga, 0, 1))) %>% | |
| summarise(Season = Season[1], team = team[1], top_pts = sum(Pts), top_ppg = mean(Pts)) | |
| #ppg against bottom 6 | |
| bottom <- subset(homeaway(subset(england, tier == 1 & Season == 2016)), team == y & opp %in% bottom_teams) %>% | |
| mutate(Pts = ifelse(gf > ga, 3, ifelse(gf < ga, 0, 1))) %>% | |
| summarise(team = team[1], bottom_pts = sum(Pts), bottom_ppg = mean(Pts)) | |
| #hoodability as difference | |
| hoodability <- merge(top, bottom, by = "team") %>% | |
| mutate(hoodability = top_ppg - bottom_ppg) %>% | |
| mutate(pos) | |
| } ) %>% | |
| plyr::rbind.fill() %>% | |
| arrange(desc(hoodability)) | |
| d2 <- merge(d, hoodability, on.x = "team", on.y="team") | |
| ``` | |
| ```{r, dpi=300} | |
| ggplot(d2, aes(x = hoodability, y = SPGA / GA * 100)) + | |
| geom_point(size = 2) + | |
| geom_smooth(method='lm', col="black", alpha=0.25) + | |
| ylab("% total goals conceded from set pieces") + | |
| xlab("Hoodability") + | |
| scale_x_continuous(limits = c(-2.1, 0)) + | |
| geom_label_repel( | |
| aes(label = team), | |
| fontface = 'bold', | |
| box.padding = unit(0.35, "lines"), | |
| point.padding = unit(0.5, "lines"), | |
| segment.color = 'grey50' | |
| ) + | |
| my_theme | |
| ``` |
Author
JoGall
commented
Aug 7, 2017

Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment