Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active February 24, 2025 05:21
Show Gist options
  • Save USMortality/5495f4fee01b5f2cc5c5c9d901fc5120 to your computer and use it in GitHub Desktop.
Save USMortality/5495f4fee01b5f2cc5c5c9d901fc5120 to your computer and use it in GitHub Desktop.
Umfragen & Wahlergebnisse [Deutschland]
library(rvest)
library(dplyr)
library(ggplot2)
library(tidyr)
library(scales)
library(purrr)
library(readr)
library(dplyr)
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
party_colors <- c(
"CDU/CSU" = "#000000",
"SPD" = "#E3000F",
"GRÜNE" = "#64BC5C",
"FDP" = "#FFCC00",
"DIE LINKE" = "#BE3075",
"AfD" = "#0C1C8C",
"FW" = "#f49800",
"BSW" = "#792350"
)
block_colors <- c(
"Links" = "#E30000",
"Rechts" = "#0000E3",
"Sonstige" = "#444444"
)
# Get historical data
ts1 <- read_csv(paste0(
"https://gist.githubusercontent.com/USMortality/",
"a92fe8baacae09b8df03dd33aeea7d67/raw/data.csv"
))
# Get latet data
dates <- seq.Date(from = as.Date("2024-09-15"), to = Sys.Date(), by = "month")
process_table <- function(tbl) {
names(tbl) <-
ifelse(names(tbl) == "", paste0("Unnamed", seq_along(tbl)), names(tbl))
tbl |>
select(1:(ncol(tbl))) |>
mutate(
across(
-1,
~ as.numeric(gsub(",", ".", gsub(" %", "", .))) / 100
)
)
}
result <- vector("list", length(dates))
for (i in seq_along(dates)) {
date <- dates[i]
print(date)
url <- paste0(
"https://web.archive.org/web/",
format(date, "%Y%m%d"),
"/https://www.wahlrecht.de/umfragen/"
)
# Attempt to read the webpage; if an error occurs, skip to the next iteration
tryCatch(
{
webpage <- read_html(url)
tables <- html_table(webpage, fill = TRUE)
df <- tables[[2]] %>%
process_table() %>%
pivot_longer(
cols = 2:(last_col() - 1),
names_to = "Variable", values_to = "Value"
) %>%
select(1, 3, 4) %>%
setNames(c("party", "institute", "value")) %>%
filter(!is.na(value))
df$date <- as.Date(dates[i])
result[[i]] <- df
},
error = function(e) {
message(sprintf(
"Error processing URL for date %s: %s",
format(dates[i], "%Y-%m-%d"), e$message
))
}
)
Sys.sleep(3)
}
# write.csv(ts, "~/Downloads/out.csv", row.names = FALSE)
ts <- bind_rows(ts1, compact(result)) |>
arrange(date, party) |>
unique()
# 12m span for LOESS
first_date <- min(ts$date)
last_date <- max(ts$date)
span <-
min(365 / as.numeric(difftime(last_date, first_date, units = "days")), 1)
# Create the plot
chart <- ggplot(
ts |> filter(
party %in% names(party_colors),
# date >= as.Date("2017-09-24") - 30
),
aes(x = date, y = value, color = party, fill = party)
) +
geom_smooth(
method = "loess",
span = span,
# span = span * 3,
alpha = 0.2
) +
geom_point(size = 0.1) +
scale_y_continuous(labels = percent_format(scale = 100)) +
scale_color_manual(values = party_colors) +
scale_fill_manual(values = party_colors) +
geom_vline(
xintercept =
as.Date(c("2013-09-22", "2017-09-24", "2021-09-26", "2025-02-23")),
linetype = "dashed", color = "black"
) +
labs(
x = "Datum",
y = "Stimmenanteil",
title = "Umfrageergebnisse [Deutschland]",
subtitle = paste0(
"Gestrichelte Linien: Bundestagswahl · ",
"Quelle: wahlrecht.de · @USMortality"
),
color = "Partei",
fill = "Partei"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "right"
)
ggplot2::ggsave(
filename = "chart1.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
left <- c("DIE LINKE", "GRÜNE", "SPD", "BSW", "PIRATEN")
right <- c("CDU/CSU", "FDP", "AfD", "FW")
ts2 <- ts |>
mutate(block = case_when(
party %in% left ~ "Links",
party %in% right ~ "Rechts",
TRUE ~ "Sonstige"
)) |>
group_by(date, institute, block) |>
summarize(value = sum(value, na.rm = TRUE))
# Create the plot
chart2 <-
ggplot(
ts2,
aes(x = date, y = value, color = block)
) +
geom_smooth(
method = "loess",
span = span,
# span = span * 3,
alpha = 0.2
) +
geom_point(size = 0.1) +
scale_y_continuous(labels = percent_format(scale = 100)) +
scale_color_manual(values = block_colors) +
geom_vline(
xintercept =
as.Date(c("2013-09-22", "2017-09-24", "2021-09-26", "2025-02-23")),
linetype = "dashed", color = "black"
) +
labs(
x = "Datum",
y = "Stimmenanteil",
title = "Umfrageergebnisse Links / Rechts [Deutschland]",
subtitle = paste0(
"Links: ", paste(left, collapse = ", "), " · Rechts: ", paste(right, collapse = ", "),
"\nGestrichelte Linien: Bundestagswahl · ",
"Quelle: wahlrecht.de · @USMortality"
),
color = "Block",
fill = "Block"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top"
)
ggplot2::ggsave(
filename = "chart2.png", plot = chart2, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
library(rvest)
library(tidyverse)
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
# Farbzuordnung der Parteien
party_colors <- c(
"CDU/CSU" = "#000000",
"SPD" = "#E3000F",
"Grüne" = "#64BC5C",
"FDP" = "#FFCC00",
"DIE LINKE" = "#BE3075",
"AfD" = "#0C1C8C",
"BSW" = "#792350",
"Sonstige" = "#444444"
)
# Daten laden und vorbereiten
prepare_data_long <- function(url) {
page <- read_html(url)
df <- page |>
html_node("#prognosen-hochrechnungen-tabelle") |>
html_table(fill = TRUE)
colnames(df) <- df[1, ]
df2 <- df |>
select(1:2, 4:12) |>
setNames(c(
"Zeit", "Institut", "SPD", "CDU/CSU", "Grüne", "FDP", "AfD", "DIE LINKE",
"SSW", "BSW", "Sonstige"
))
df3 <- df2[-c(1, 2), ] |>
mutate(Zeit = as.POSIXct(
paste(Sys.Date() + ifelse(as.numeric(str_sub(Zeit, 1, 2)) < 18, 1, 0), Zeit),
format = "%Y-%m-%d %H:%M"
)) |>
mutate(across(3:ncol(df2), ~ as.numeric(
str_replace(str_replace(.x, ",", "."), " %", "")
) / 100)) |>
select(-SSW)
df3 %>%
pivot_longer(cols = -c(Zeit, Institut), names_to = "Partei", values_to = "Stimmanteil")
}
# Funktion für das Plotten
plot_results <- function(data, institut, change_from_first = FALSE) {
if (change_from_first) {
first_reported <- data %>%
filter(Institut == institut) %>%
group_by(Partei) %>%
arrange(Zeit) %>%
slice(1) %>%
select(Partei, first_Stimmanteil = Stimmanteil)
data <- data %>%
filter(Institut == institut) %>%
left_join(first_reported, by = "Partei") %>%
mutate(Stimmanteil = Stimmanteil - first_Stimmanteil)
title <- paste("Änderung seit der ersten Prognose [", institut, "]", sep = "")
y_label <- "Änderung des Stimmanteils"
} else {
data <- data %>% filter(Institut == institut)
title <- paste("Entwicklung der Prognosen & Hochrechnungen [", institut, "]", sep = "")
y_label <- "Stimmanteil"
}
scales_value <- if (change_from_first) "fixed" else "free"
ggplot(
data,
aes(x = Zeit, y = Stimmanteil, color = Partei, group = Partei)
) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(
title = title,
x = "Uhrzeit",
y = y_label,
color = "Partei"
) +
theme_minimal() +
theme(legend.position = "none") +
scale_color_manual(values = party_colors) +
scale_y_continuous(labels = scales::percent_format(accuracy = 0.1)) +
facet_wrap(~Partei, scales = scales_value)
}
# Daten abrufen
url <- "https://www.wahlrecht.de/news/2025/bundestagswahl-2025.html"
data_long <- prepare_data_long(url)
final <- tibble(
Zeit = as.POSIXct("2025-02-24 01:51"),
Partei = c("SPD", "CDU/CSU", "Grüne", "FDP", "AfD", "DIE LINKE", "BSW", "Sonstige"),
Stimmanteil = c(16.4, 28.6, 11.6, 4.3, 20.8, 8.8, 4.9, 4.7) / 100
)
final$Institut <- "ARD"
final2 <- final
final2$Institut <- "ZDF"
data_long <- bind_rows(data_long, final, final2)
# Plots generieren
plot_results(data_long, "ARD")
plot_results(data_long, "ARD", change_from_first = TRUE)
plot_results(data_long, "ZDF")
plot_results(data_long, "ZDF", change_from_first = TRUE)
library(tidyverse)
library(rvest)
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
# Farbzuordnung der Parteien
party_colors <- c(
"CDU/CSU" = "#000000",
"SPD" = "#E3000F",
"GRÜNE" = "#64BC5C",
"FDP" = "#FFCC00",
"DIE LINKE" = "#BE3075",
"AfD" = "#0C1C8C",
"BSW" = "#792350",
"Sonstige" = "#444444"
)
url <- "https://www.wahlrecht.de/ergebnisse/bundestag.htm"
webpage <- read_html(url)
tables <- html_table(webpage, fill = TRUE)
df <- tables[[2]]
names(df) <- c("Partei", tail(names(df), -1))
df <- df |>
select(c(1, which(grepl("%", df[1, ])))) |>
slice(-c(1, 2)) |>
pivot_longer(-Partei, names_to = "year") |>
mutate(
value = as.double(gsub(",", ".", value)) / 100,
year = ifelse(year == "2025*", "2025", year),
Partei = ifelse(Partei == "DIE LINKE²", "DIE LINKE", Partei)
)
df |>
filter(Partei %in% c("SPD", "CDU/CSU", "FDP", "GRÜNE", "DIE LINKE", "AfD")) |>
ggplot(aes(x = as.numeric(year), y = value, group = Partei, color = Partei)) +
labs(
title = "Ergebnis der Bundestagswahlen",
x = "",
y = "Stimmenanteil",
color = "Partei"
) +
geom_line() +
geom_point() +
theme_minimal() +
theme(legend.position = "right") +
scale_color_manual(values = party_colors) +
scale_y_continuous(labels = scales::percent_format())
# Groko
df |>
filter(Partei %in% c("SPD", "CDU/CSU")) |>
group_by(year) |>
summarize(value = sum(value)) |>
ggplot(aes(x = as.numeric(year), y = value)) +
labs(
title = "Ergebnis der \"GroKo\" (CDU/CSU & SPD) bei Bundestagswahlen",
x = "",
y = "Stimmenanteil",
color = "Partei"
) +
geom_line() +
coord_cartesian(ylim = c(0, 1)) +
geom_point() +
theme_minimal() +
theme(legend.position = "right") +
scale_color_manual(values = party_colors) +
scale_y_continuous(labels = scales::percent_format())
links <- c("SPD", "GRÜNE", "DIE LINKE", "KPD/DKP³")
rechts <- c("CDU/CSU", "FDP", "AfD", "GB/BHE")
subtitle_text <- paste0(
"Links: ", paste(links, collapse = ", "), "\n",
"Rechts: ", paste(rechts, collapse = ", ")
)
df |>
mutate(Block = case_when(
Partei %in% links ~ "links",
Partei %in% rechts ~ "rechts"
)) |>
filter(!is.na(Block)) |>
group_by(year, Block) |>
summarize(value = sum(value, na.rm = TRUE)) |>
ggplot(aes(x = as.numeric(year), y = value, group = Block, color = Block)) +
labs(
title = "Bundestagswahlen \"Links\" vs \"Rechts\"",
subtitle = subtitle_text,
x = "",
y = "Stimmenanteil",
color = "Block"
) +
geom_line() +
coord_cartesian(ylim = c(0, 1)) +
geom_point() +
theme_minimal() +
theme(legend.position = "bottom") +
scale_color_manual(values = c(
"rechts" = "#000000",
"links" = "#E3000F"
)) +
scale_y_continuous(labels = scales::percent_format())
@USMortality
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment