Skip to content

Instantly share code, notes, and snippets.

@JoGall
Last active August 7, 2017 13:56
Show Gist options
  • Select an option

  • Save JoGall/8fa2fef985d916aeef09aa6a98bcb72c to your computer and use it in GitHub Desktop.

Select an option

Save JoGall/8fa2fef985d916aeef09aa6a98bcb72c to your computer and use it in GitHub Desktop.
On 'Hoodability' and conceding set piece goals
Data on set piece goals scored and conceded in 2016-17 from [here](https://objective-football.blogspot.co.uk/2016/08/201617-open-playset-piecepenalty-data.html).
---
# Get data
```{r}
library(dplyr)
library(XML)
library(RCurl)
library(engsoccerdata)
library(ggplot2)
library(ggrepel)
#set ggplot theme
my_theme <- theme_bw() +
theme(
axis.line = element_line(colour = "black"),
axis.title = element_text(size=14, colour = "black", family='Roboto Condensed Light'),
axis.text = element_text(size=14, colour = "black", family='Roboto Condensed Light'),
legend.position = "none"
)
# 1) Get set piece goals data
# scrape table from url
tmp <- "https://objective-football.blogspot.co.uk/2016/08/201617-open-playset-piecepenalty-data.html" %>%
getURL() %>%
readHTMLTable(header = TRUE)
tmp <- tmp[[1]]
# reformat
setpieces <- data.frame(team = tmp[,2], GF = as.numeric(as.character(tmp[,7])) + as.numeric(as.character(tmp[,18])) + as.numeric(as.character(tmp[,25])), GA = as.numeric(as.character(tmp[,8])) + as.numeric(as.character(tmp[,19])) + as.numeric(as.character(tmp[,26])), SPGF = tmp[,18], SPGA = tmp[,19])[-21,]
# 2) Get final league positions for 2016-17 season from engsoccerdata
# update 'england' dataframe if there are new results
england <- rbind(england, subset(england_current(), tier == 1 & !(Date %in% england$Date & home %in% england$home)))
# calculate final league positions
pos <- maketable(england, Season = 2016, tier = 1) %>%
arrange(team)
# 3) Merge dataframes
# join based on alphabetical order of teams (order of AFC Bournemouth & Arsenal require switching)
d <- data.frame(team = pos$team, pos = pos$Pos, setpieces[c(2,1,3:20),-1])
# convert factors to numeric
d <- data.frame(team = pos$team, apply(d[,2:6], 2, function(x) as.numeric(as.character(x))) )
```
Plot %age total goals SCORED from set pieces vs. final league position
```{r, dpi=300}
ggplot(d, aes(x = pos, y = SPGF / GF * 100)) +
geom_point(size = 2) +
geom_smooth(method='lm', col="black", alpha=0.25) +
scale_x_continuous(limits = c(1,20), breaks = c(1, 5, 10, 15, 20)) +
ylab("% total goals scored from set pieces") +
xlab("Final league position") +
geom_label_repel(
aes(label = team),
fontface = 'bold',
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50'
) +
my_theme
```
Plot %age total goals CONCEDED from set pieces vs. final league position
```{r, dpi=300}
ggplot(d, aes(x = pos, y = SPGA / GA * 100)) +
geom_point(size = 2) +
geom_smooth(method='lm', col="black", alpha=0.25) +
scale_x_continuous(limits = c(1,20), breaks = c(1, 5, 10, 15, 20)) +
ylab("% total goals conceded from set pieces") +
xlab("Final league position") +
geom_label_repel(
aes(label = team),
fontface = 'bold',
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50'
) +
my_theme
```
Plot %age total goals CONCEDED from set pieces vs. 'Hoodability' (defined [here](https://jogall.github.io/2017-08-04-robin-hood-teams/))
```{r}
# calculate hoodability
# Update 'england' dataframe if there are new results
england <- rbind(england, subset(england_current(), tier == 1 & !(Date %in% england$Date & home %in% england$home)))
#make league table
x_table <- maketable(england, Season = 2016, tier = 1) %>%
mutate(Pos = as.numeric(Pos)) %>%
dplyr::select(team, Pos)
#get all teams plus top 6 and bottom 6
all_teams <- x_table$team
top_teams <- subset(x_table, Pos %in% 1:6)$team
bottom_teams <- subset(x_table, Pos %in% 15:20)$team
#for each team in league that season
hoodability <- lapply(all_teams, function(y) {
#league position
pos <- subset(x_table, team == y)$Pos
#ppg against top 6
top <- subset(homeaway(subset(england, tier == 1 & Season == 2016)), team == y & opp %in% top_teams) %>%
mutate(Pts = ifelse(gf > ga, 3, ifelse(gf < ga, 0, 1))) %>%
summarise(Season = Season[1], team = team[1], top_pts = sum(Pts), top_ppg = mean(Pts))
#ppg against bottom 6
bottom <- subset(homeaway(subset(england, tier == 1 & Season == 2016)), team == y & opp %in% bottom_teams) %>%
mutate(Pts = ifelse(gf > ga, 3, ifelse(gf < ga, 0, 1))) %>%
summarise(team = team[1], bottom_pts = sum(Pts), bottom_ppg = mean(Pts))
#hoodability as difference
hoodability <- merge(top, bottom, by = "team") %>%
mutate(hoodability = top_ppg - bottom_ppg) %>%
mutate(pos)
} ) %>%
plyr::rbind.fill() %>%
arrange(desc(hoodability))
d2 <- merge(d, hoodability, on.x = "team", on.y="team")
```
```{r, dpi=300}
ggplot(d2, aes(x = hoodability, y = SPGA / GA * 100)) +
geom_point(size = 2) +
geom_smooth(method='lm', col="black", alpha=0.25) +
ylab("% total goals conceded from set pieces") +
xlab("Hoodability") +
scale_x_continuous(limits = c(-2.1, 0)) +
geom_label_repel(
aes(label = team),
fontface = 'bold',
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50'
) +
my_theme
```
@JoGall
Copy link
Copy Markdown
Author

JoGall commented Aug 7, 2017

unnamed-chunk-5-1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment