Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active March 2, 2025 16:35
Show Gist options
  • Save USMortality/21a0c9d58e280d7966307bd51f57468c to your computer and use it in GitHub Desktop.
Save USMortality/21a0c9d58e280d7966307bd51f57468c to your computer and use it in GitHub Desktop.
2. Bundesliga Standings
library(tidyverse)
# Load and preprocess the dataset
df <- read_csv("https://www.football-data.co.uk/mmz4281/2425/D2.csv") |>
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |>
select(Date, HomeTeam, AwayTeam, FTR) |>
arrange(Date)
# Ensure unique set of teams per matchday
matchday_counter <- 1
teams_played <- c()
df2 <- df |>
rowwise() |>
mutate(
Matchday = {
if (HomeTeam %in% teams_played | AwayTeam %in% teams_played) {
matchday_counter <<- matchday_counter + 1
teams_played <<- c()
}
teams_played <<- c(teams_played, HomeTeam, AwayTeam)
matchday_counter
}
) |>
ungroup() |>
select(Matchday, HomeTeam, AwayTeam, FTR)
second_half <- df2 |>
filter(Matchday %in% 1:17) |>
mutate(
Matchday = Matchday + 17,
HomeTeam2 = HomeTeam,
HomeTeam = AwayTeam,
AwayTeam = HomeTeam2,
FTR = ifelse( # Flip home/away win; draws remain "D")
FTR == "H", "A", ifelse(FTR == "A", "H", FTR)
)
) |>
select(-HomeTeam2)
generate_league_table <- function(team_name = NULL, x_chance = 0.5) {
# Identify missing matches
unplayed_matches <- anti_join(second_half, df2, by = c("Matchday", "HomeTeam", "AwayTeam"))
# Version 1: Keep FTR as is (Original)
unplayed_matches1 <- unplayed_matches
# Version 2: Assign FTR so team_name always wins
unplayed_matches2 <- unplayed_matches %>%
mutate(
FTR = ifelse(HomeTeam == team_name, "H",
ifelse(AwayTeam == team_name, "A", FTR)
)
)
# Version 3: Forced Losses for `team_name`
unplayed_matches3 <- unplayed_matches %>%
mutate(
FTR = ifelse(HomeTeam == team_name, "A",
ifelse(AwayTeam == team_name, "H", FTR)
) # Reverse logic for losses
)
# Version 4: Override with x% chance
set.seed(42) # Ensure reproducibility
unplayed_matches4 <- unplayed_matches %>%
mutate(
override = runif(n()) < x_chance, # Generate TRUE/FALSE based on x_chance probability
FTR = ifelse(override, ifelse(HomeTeam == team_name, "H", "A"), FTR) # Change only with probability
) %>%
select(-override) # Remove temporary column
# Generate df3 for all four versions
df3_v1 <- bind_rows(df2, unplayed_matches1) %>%
arrange(Matchday, HomeTeam, AwayTeam)
df3_v2 <- bind_rows(df2, unplayed_matches2) %>%
arrange(Matchday, HomeTeam, AwayTeam)
df3_v3 <- bind_rows(df2, unplayed_matches3) %>%
arrange(Matchday, HomeTeam, AwayTeam)
df3_v4 <- bind_rows(df2, unplayed_matches4) %>%
arrange(Matchday, HomeTeam, AwayTeam)
# Function to calculate final table
calculate_table <- function(df) {
df %>%
mutate(
HomePoints = case_when(FTR == "H" ~ 3, FTR == "D" ~ 1, TRUE ~ 0),
AwayPoints = case_when(FTR == "A" ~ 3, FTR == "D" ~ 1, TRUE ~ 0)
) %>%
pivot_longer(cols = c(HomeTeam, AwayTeam), names_to = "Location", values_to = "Team") %>%
mutate(
Points = ifelse(Location == "HomeTeam", HomePoints, AwayPoints),
Wins = (FTR == "H" & Location == "HomeTeam") | (FTR == "A" & Location == "AwayTeam"),
Draws = FTR == "D",
Losses = (FTR == "A" & Location == "HomeTeam") | (FTR == "H" & Location == "AwayTeam")
) %>%
group_by(Team) %>%
summarise(
GP = n(),
W = sum(Wins),
D = sum(Draws),
L = sum(Losses),
Pts = sum(Points),
.groups = "drop"
) %>%
arrange(desc(Pts), desc(W), desc(D))
}
# Calculate final tables
final_table_v1 <- calculate_table(df3_v1)
final_table_v2 <- calculate_table(df3_v2)
final_table_v3 <- calculate_table(df3_v3)
final_table_v4 <- calculate_table(df3_v4)
return(list(
"Version 1 (Original)" = final_table_v1,
"Version 2 (Forced Wins)" = final_table_v2,
"Version 3 (Forced Losses)" = final_table_v3,
"Version 4 (x% Chance Override)" = final_table_v4
))
}
# Example usage (Karlsruhe with 18% override chance)
league_tables <- generate_league_table("Karlsruhe", x_chance = 0.18)
# Print all four versions
print(league_tables$`Version 1 (Original)`) # Regular version
print(league_tables$`Version 2 (Forced Wins)`) # Forced wins for Karlsruhe
print(league_tables$`Version 3 (Forced Losses)`) # Forced losses for Karlsruhe
print(league_tables$`Version 4 (x% Chance Override)`) # Random override with 18% probability
library(rvest)
library(dplyr)
library(stringr)
library(ggimage)
library(purrr)
url <- "https://www.kicker.de/2-bundesliga/tabelle"
page <- read_html(url)
width <- 480 * 2
height <- 480 * 2
options(vsc.dev.args = list(width = width, height = height, res = 72 * 2))
table <- page |> html_table(fill = TRUE)
table <- table[[1]]
logos <- page |>
html_nodes(".kick__table tbody tr") |>
map_df(~ {
image_link <- .x |>
html_node("td:nth-child(3) a picture img") |>
html_attr("src") |>
str_remove(paste0(
"https://derivates.kicker.de/image/fetch/",
"w_30%2Ch_30%2Cc_fit%2Cq_auto:best/"
))
tibble(logo = image_link) |>
filter(!is.na(logo))
})
df <- table |>
select(Team, Punkte) |>
mutate(
Punkte = as.integer(Punkte), # Convert to integer
Team = sapply(strsplit(Team, "\r\n"), `[`, 1)
) |>
mutate(
Team = factor(Team, levels = rev(Team)), # Reverse levels so highest is at top
rank = row_number() # Add rank for separation lines
)
df$logo <- logos$logo
df$abstand_aufstieg <- df$Punkte - df$Punkte[3]
df$abstand_abstieg <- df$Punkte - df$Punkte[16]
df$text <- paste0(
"P: ", df$Punkte, " (^", df$abstand_aufstieg, ", v", df$abstand_abstieg, ")"
)
n_teams <- nrow(df)
top_line <- n_teams - 3 + 0.5
bottom_line <- 3.5
x_min <- min(df$Punkte, na.rm = TRUE) - 3
x_max <- max(df$Punkte, na.rm = TRUE) + 6
ggplot(df, aes(x = Punkte, y = rank)) +
geom_segment(aes(x = x_min, xend = Punkte, y = rank, yend = rank),
color = "gray", size = 1
) +
geom_image(aes(image = logo), size = 0.05) +
geom_text(aes(label = text, x = Punkte), hjust = -0.3, size = 3.5) +
geom_hline(yintercept = top_line, linetype = "dashed", color = "red") +
geom_hline(yintercept = bottom_line, linetype = "dashed", color = "red") +
scale_x_continuous(limits = c(x_min, x_max)) +
scale_y_reverse(breaks = 1:18, labels = df$rank) +
labs(
title = "2. Bundesliga - Tabelle",
subtitle = paste0(
"Quelle: Kicker.de · Spieltag 24\n",
"P = Punkte, ^ = Abstand Aufstiegsplatz, v = Abstand Abstiegsplatz"
),
x = "Punkte", y = "Platz"
) +
theme_minimal() +
theme(
axis.text.y = element_text(color = "black"),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank()
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment