Last active
March 2, 2025 16:35
-
-
Save USMortality/21a0c9d58e280d7966307bd51f57468c to your computer and use it in GitHub Desktop.
2. Bundesliga Standings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# Load and preprocess the dataset | |
df <- read_csv("https://www.football-data.co.uk/mmz4281/2425/D2.csv") |> | |
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |> | |
select(Date, HomeTeam, AwayTeam, FTR) |> | |
arrange(Date) | |
# Ensure unique set of teams per matchday | |
matchday_counter <- 1 | |
teams_played <- c() | |
df2 <- df |> | |
rowwise() |> | |
mutate( | |
Matchday = { | |
if (HomeTeam %in% teams_played | AwayTeam %in% teams_played) { | |
matchday_counter <<- matchday_counter + 1 | |
teams_played <<- c() | |
} | |
teams_played <<- c(teams_played, HomeTeam, AwayTeam) | |
matchday_counter | |
} | |
) |> | |
ungroup() |> | |
select(Matchday, HomeTeam, AwayTeam, FTR) | |
second_half <- df2 |> | |
filter(Matchday %in% 1:17) |> | |
mutate( | |
Matchday = Matchday + 17, | |
HomeTeam2 = HomeTeam, | |
HomeTeam = AwayTeam, | |
AwayTeam = HomeTeam2, | |
FTR = ifelse( # Flip home/away win; draws remain "D") | |
FTR == "H", "A", ifelse(FTR == "A", "H", FTR) | |
) | |
) |> | |
select(-HomeTeam2) | |
generate_league_table <- function(team_name = NULL, x_chance = 0.5) { | |
# Identify missing matches | |
unplayed_matches <- anti_join(second_half, df2, by = c("Matchday", "HomeTeam", "AwayTeam")) | |
# Version 1: Keep FTR as is (Original) | |
unplayed_matches1 <- unplayed_matches | |
# Version 2: Assign FTR so team_name always wins | |
unplayed_matches2 <- unplayed_matches %>% | |
mutate( | |
FTR = ifelse(HomeTeam == team_name, "H", | |
ifelse(AwayTeam == team_name, "A", FTR) | |
) | |
) | |
# Version 3: Forced Losses for `team_name` | |
unplayed_matches3 <- unplayed_matches %>% | |
mutate( | |
FTR = ifelse(HomeTeam == team_name, "A", | |
ifelse(AwayTeam == team_name, "H", FTR) | |
) # Reverse logic for losses | |
) | |
# Version 4: Override with x% chance | |
set.seed(42) # Ensure reproducibility | |
unplayed_matches4 <- unplayed_matches %>% | |
mutate( | |
override = runif(n()) < x_chance, # Generate TRUE/FALSE based on x_chance probability | |
FTR = ifelse(override, ifelse(HomeTeam == team_name, "H", "A"), FTR) # Change only with probability | |
) %>% | |
select(-override) # Remove temporary column | |
# Generate df3 for all four versions | |
df3_v1 <- bind_rows(df2, unplayed_matches1) %>% | |
arrange(Matchday, HomeTeam, AwayTeam) | |
df3_v2 <- bind_rows(df2, unplayed_matches2) %>% | |
arrange(Matchday, HomeTeam, AwayTeam) | |
df3_v3 <- bind_rows(df2, unplayed_matches3) %>% | |
arrange(Matchday, HomeTeam, AwayTeam) | |
df3_v4 <- bind_rows(df2, unplayed_matches4) %>% | |
arrange(Matchday, HomeTeam, AwayTeam) | |
# Function to calculate final table | |
calculate_table <- function(df) { | |
df %>% | |
mutate( | |
HomePoints = case_when(FTR == "H" ~ 3, FTR == "D" ~ 1, TRUE ~ 0), | |
AwayPoints = case_when(FTR == "A" ~ 3, FTR == "D" ~ 1, TRUE ~ 0) | |
) %>% | |
pivot_longer(cols = c(HomeTeam, AwayTeam), names_to = "Location", values_to = "Team") %>% | |
mutate( | |
Points = ifelse(Location == "HomeTeam", HomePoints, AwayPoints), | |
Wins = (FTR == "H" & Location == "HomeTeam") | (FTR == "A" & Location == "AwayTeam"), | |
Draws = FTR == "D", | |
Losses = (FTR == "A" & Location == "HomeTeam") | (FTR == "H" & Location == "AwayTeam") | |
) %>% | |
group_by(Team) %>% | |
summarise( | |
GP = n(), | |
W = sum(Wins), | |
D = sum(Draws), | |
L = sum(Losses), | |
Pts = sum(Points), | |
.groups = "drop" | |
) %>% | |
arrange(desc(Pts), desc(W), desc(D)) | |
} | |
# Calculate final tables | |
final_table_v1 <- calculate_table(df3_v1) | |
final_table_v2 <- calculate_table(df3_v2) | |
final_table_v3 <- calculate_table(df3_v3) | |
final_table_v4 <- calculate_table(df3_v4) | |
return(list( | |
"Version 1 (Original)" = final_table_v1, | |
"Version 2 (Forced Wins)" = final_table_v2, | |
"Version 3 (Forced Losses)" = final_table_v3, | |
"Version 4 (x% Chance Override)" = final_table_v4 | |
)) | |
} | |
# Example usage (Karlsruhe with 18% override chance) | |
league_tables <- generate_league_table("Karlsruhe", x_chance = 0.18) | |
# Print all four versions | |
print(league_tables$`Version 1 (Original)`) # Regular version | |
print(league_tables$`Version 2 (Forced Wins)`) # Forced wins for Karlsruhe | |
print(league_tables$`Version 3 (Forced Losses)`) # Forced losses for Karlsruhe | |
print(league_tables$`Version 4 (x% Chance Override)`) # Random override with 18% probability |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(dplyr) | |
library(stringr) | |
library(ggimage) | |
library(purrr) | |
url <- "https://www.kicker.de/2-bundesliga/tabelle" | |
page <- read_html(url) | |
width <- 480 * 2 | |
height <- 480 * 2 | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * 2)) | |
table <- page |> html_table(fill = TRUE) | |
table <- table[[1]] | |
logos <- page |> | |
html_nodes(".kick__table tbody tr") |> | |
map_df(~ { | |
image_link <- .x |> | |
html_node("td:nth-child(3) a picture img") |> | |
html_attr("src") |> | |
str_remove(paste0( | |
"https://derivates.kicker.de/image/fetch/", | |
"w_30%2Ch_30%2Cc_fit%2Cq_auto:best/" | |
)) | |
tibble(logo = image_link) |> | |
filter(!is.na(logo)) | |
}) | |
df <- table |> | |
select(Team, Punkte) |> | |
mutate( | |
Punkte = as.integer(Punkte), # Convert to integer | |
Team = sapply(strsplit(Team, "\r\n"), `[`, 1) | |
) |> | |
mutate( | |
Team = factor(Team, levels = rev(Team)), # Reverse levels so highest is at top | |
rank = row_number() # Add rank for separation lines | |
) | |
df$logo <- logos$logo | |
df$abstand_aufstieg <- df$Punkte - df$Punkte[3] | |
df$abstand_abstieg <- df$Punkte - df$Punkte[16] | |
df$text <- paste0( | |
"P: ", df$Punkte, " (^", df$abstand_aufstieg, ", v", df$abstand_abstieg, ")" | |
) | |
n_teams <- nrow(df) | |
top_line <- n_teams - 3 + 0.5 | |
bottom_line <- 3.5 | |
x_min <- min(df$Punkte, na.rm = TRUE) - 3 | |
x_max <- max(df$Punkte, na.rm = TRUE) + 6 | |
ggplot(df, aes(x = Punkte, y = rank)) + | |
geom_segment(aes(x = x_min, xend = Punkte, y = rank, yend = rank), | |
color = "gray", size = 1 | |
) + | |
geom_image(aes(image = logo), size = 0.05) + | |
geom_text(aes(label = text, x = Punkte), hjust = -0.3, size = 3.5) + | |
geom_hline(yintercept = top_line, linetype = "dashed", color = "red") + | |
geom_hline(yintercept = bottom_line, linetype = "dashed", color = "red") + | |
scale_x_continuous(limits = c(x_min, x_max)) + | |
scale_y_reverse(breaks = 1:18, labels = df$rank) + | |
labs( | |
title = "2. Bundesliga - Tabelle", | |
subtitle = paste0( | |
"Quelle: Kicker.de · Spieltag 24\n", | |
"P = Punkte, ^ = Abstand Aufstiegsplatz, v = Abstand Abstiegsplatz" | |
), | |
x = "Punkte", y = "Platz" | |
) + | |
theme_minimal() + | |
theme( | |
axis.text.y = element_text(color = "black"), | |
panel.grid.major.y = element_blank(), | |
panel.grid.minor.y = element_blank() | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment