Skip to content

Instantly share code, notes, and snippets.

@mschnetzer
Last active May 21, 2019 15:46
Show Gist options
  • Save mschnetzer/9b80f92fe2fc1df9cb8ad4a0c65537e3 to your computer and use it in GitHub Desktop.
Save mschnetzer/9b80f92fe2fc1df9cb8ad4a0c65537e3 to your computer and use it in GitHub Desktop.
Textanalyse der Pressekonferenzen Kurz und Strache zum #ibizagate (https://twitter.com/matschnetzer/status/1130862027696742406)
library(tidytext)
library(tm)
library(tidyverse)
library(ggforce)
library(msthemes)
library(rvest)
strache <- read_html("https://neuwal.com/transkript/20190518-pressekonferenz-heinz-christian-strache.php") %>%
html_nodes(".antwort") %>%
html_text()
kurz <- read_html("https://neuwal.com/transkript/20190518-pressekonferenz-sebastian-kurz.php") %>%
html_nodes(".antwort") %>%
html_text()
kurz_df <- tibble(line = 1, text=kurz)
strache_df <- tibble(line = 1:2, text=strache)
stopword <- tibble(word = c(stopwords("de"),"dass"))
kurz_cl <- kurz_df %>% unnest_tokens(word, text) %>%
anti_join(stopword)
strache_cl <- strache_df %>% unnest_tokens(word, text) %>%
anti_join(stopword)
kurz_count <- kurz_cl %>% count(word, sort = TRUE) # %>% mutate(n = n/sum(n)*100)
strache_count <- strache_cl %>% count(word, sort=TRUE) # %>% mutate(n = n/sum(n)*100)
intersect(kurz_cl$word, strache_cl$word)
df <- inner_join(kurz_count, strache_count, by="word")
df %>%
ggplot(aes(n.x, n.y)) +
geom_jitter(data= df %>% filter(!word %in% c("silberstein","land","fpö")),
color="grey20", size=4, alpha=0.2) +
geom_point(data= df %>% filter(word %in% c("silberstein","land","fpö")),
color="red", size=4, alpha=0.8) +
geom_mark_circle(aes(filter = word == "silberstein", label = "Silberstein",
description = "Silberstein kommt in Straches Rede 2 Mal vor, die FPÖ nur 1 Mal"), label.buffer = unit(27,"mm"),
label.fontsize = c(11, 8)) +
geom_mark_circle(aes(filter = word == "land", label = "Land",
description = 'Kurz spricht sehr oft von "unserem Land"'),
label.fontsize = c(11, 8)) +
geom_mark_circle(aes(filter = word == "fpö", label = "FPÖ",
description = "Kurz nennt 3 Mal die FPÖ, Strache nur 1 Mal"), label.buffer = unit(15,"mm"),
label.fontsize = c(11, 8)) +
scale_y_continuous(limits=c(-0.2,6)) +
scale_x_continuous(limits=c(-0.2,13),breaks=seq(0,14,2)) +
ggplot2::annotate("text",x=-0.1, y=0, label="Steigende Häufigkeit", size= 3, hjust=0, alpha=0.7) +
geom_segment(aes(x=2, xend=4, y=0, yend=0), size = 0.3, color="grey20",
arrow = arrow(length = unit(0.1, "cm"))) +
geom_segment(aes(x=0, xend=0, y=0.2, yend=2), size = 0.3, alpha=0.2,
arrow = arrow(length = unit(0.1, "cm"))) +
theme_ms() +
labs(x="Sebastian Kurz",y="Heinz-Christian Strache",
title = "Pressekonferenzen Kurz und Strache zum #ibizagate",
subtitle = "Worte, die in beiden Statements vom 18. Mai 2019 vorkommen",
caption = "Quelle: neuwal.com - Grafik: @matschnetzer") +
theme(axis.text = element_blank()) +
ggsave("straku.png", dpi=300)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment