Last active
May 21, 2019 15:46
-
-
Save mschnetzer/9b80f92fe2fc1df9cb8ad4a0c65537e3 to your computer and use it in GitHub Desktop.
Textanalyse der Pressekonferenzen Kurz und Strache zum #ibizagate (https://twitter.com/matschnetzer/status/1130862027696742406)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidytext) | |
library(tm) | |
library(tidyverse) | |
library(ggforce) | |
library(msthemes) | |
library(rvest) | |
strache <- read_html("https://neuwal.com/transkript/20190518-pressekonferenz-heinz-christian-strache.php") %>% | |
html_nodes(".antwort") %>% | |
html_text() | |
kurz <- read_html("https://neuwal.com/transkript/20190518-pressekonferenz-sebastian-kurz.php") %>% | |
html_nodes(".antwort") %>% | |
html_text() | |
kurz_df <- tibble(line = 1, text=kurz) | |
strache_df <- tibble(line = 1:2, text=strache) | |
stopword <- tibble(word = c(stopwords("de"),"dass")) | |
kurz_cl <- kurz_df %>% unnest_tokens(word, text) %>% | |
anti_join(stopword) | |
strache_cl <- strache_df %>% unnest_tokens(word, text) %>% | |
anti_join(stopword) | |
kurz_count <- kurz_cl %>% count(word, sort = TRUE) # %>% mutate(n = n/sum(n)*100) | |
strache_count <- strache_cl %>% count(word, sort=TRUE) # %>% mutate(n = n/sum(n)*100) | |
intersect(kurz_cl$word, strache_cl$word) | |
df <- inner_join(kurz_count, strache_count, by="word") | |
df %>% | |
ggplot(aes(n.x, n.y)) + | |
geom_jitter(data= df %>% filter(!word %in% c("silberstein","land","fpö")), | |
color="grey20", size=4, alpha=0.2) + | |
geom_point(data= df %>% filter(word %in% c("silberstein","land","fpö")), | |
color="red", size=4, alpha=0.8) + | |
geom_mark_circle(aes(filter = word == "silberstein", label = "Silberstein", | |
description = "Silberstein kommt in Straches Rede 2 Mal vor, die FPÖ nur 1 Mal"), label.buffer = unit(27,"mm"), | |
label.fontsize = c(11, 8)) + | |
geom_mark_circle(aes(filter = word == "land", label = "Land", | |
description = 'Kurz spricht sehr oft von "unserem Land"'), | |
label.fontsize = c(11, 8)) + | |
geom_mark_circle(aes(filter = word == "fpö", label = "FPÖ", | |
description = "Kurz nennt 3 Mal die FPÖ, Strache nur 1 Mal"), label.buffer = unit(15,"mm"), | |
label.fontsize = c(11, 8)) + | |
scale_y_continuous(limits=c(-0.2,6)) + | |
scale_x_continuous(limits=c(-0.2,13),breaks=seq(0,14,2)) + | |
ggplot2::annotate("text",x=-0.1, y=0, label="Steigende Häufigkeit", size= 3, hjust=0, alpha=0.7) + | |
geom_segment(aes(x=2, xend=4, y=0, yend=0), size = 0.3, color="grey20", | |
arrow = arrow(length = unit(0.1, "cm"))) + | |
geom_segment(aes(x=0, xend=0, y=0.2, yend=2), size = 0.3, alpha=0.2, | |
arrow = arrow(length = unit(0.1, "cm"))) + | |
theme_ms() + | |
labs(x="Sebastian Kurz",y="Heinz-Christian Strache", | |
title = "Pressekonferenzen Kurz und Strache zum #ibizagate", | |
subtitle = "Worte, die in beiden Statements vom 18. Mai 2019 vorkommen", | |
caption = "Quelle: neuwal.com - Grafik: @matschnetzer") + | |
theme(axis.text = element_blank()) + | |
ggsave("straku.png", dpi=300) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment