Created
August 31, 2017 14:48
-
-
Save daob/448c7872297160168d082bc232b8adcb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(lubridate) | |
library(rjson) | |
library(tidyverse) | |
library(ISOcodes) | |
library(ggplot2) | |
library(ggthemes) | |
setwd("~/Dropbox/ERC/templates/") | |
ess7 <- haven::read_dta("ESS7e02_1.dta") | |
data("ISO_639_2") | |
rownames(ISO_639_2) <- ISO_639_2$Alpha_3_B | |
ess7_values <- ess7 %>% | |
dplyr::select(idno, cntry, dweight, lnghom1, ipcrtiv:impfun) %>% | |
dplyr::mutate_at(-(1:4), ~ ifelse(.x > 6, NA, .x)) %>% | |
dplyr::mutate(language_home = ifelse(lnghom1 %in% c("777", "888", "999"), NA, lnghom1)) %>% | |
dplyr::mutate(language_home = ifelse(language_home == "GSW", "GER", language_home)) # Swiss German = German | |
ess7_values$language_home_iso2 <- ISO_639_2[tolower(ess7_values$language_home), 'Alpha_2'] | |
ess7_values$language_home_name <- ISO_639_2[tolower(ess7_values$language_home), 'Name'] | |
# Filter out small-time langauges | |
ess7_languages <- ess7_values %>% | |
dplyr::group_by(language_home_iso2) %>% | |
dplyr::summarise(n = n()) %>% | |
dplyr::filter(n > 200) | |
ess7_values <- ess7_values %>% | |
dplyr::filter(language_home_iso2 %in% ess7_languages$language_home_iso2) | |
# Show languages and counts in data | |
table(ess7_values$language_home_iso2, exclude = c()) | |
# table(ess7_values$language_home_iso2, exclude = c()) %>% sort %>% barplot | |
summary(ess7_values) | |
ess7_values_means <- ess7_values %>% | |
dplyr::group_by(language_home_iso2) %>% | |
dplyr::summarise_if(~ is.numeric(.x), mean, na.rm = TRUE) | |
get_counts_perproject <- function(lang_iso2) { | |
url <- "https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/%s.wikipedia.org/all-access/all-agents/daily/20150701/20160701" %>% | |
sprintf(lang_iso2) | |
d <- rjson::fromJSON(file = url)$items %>% | |
dplyr::bind_rows() %>% | |
mutate(date = lubridate::as_date(timestamp, format = "%Y%m%d")) | |
d | |
} | |
page_names <- c("William_Shakespeare", "Game_of_Thrones") | |
get_counts_perpage <- function(lang_iso2, page_name) { | |
url <- "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/%s.wikipedia/all-access/all-agents/%s/daily/20150701/20160701" %>% | |
sprintf(lang_iso2, page_name) | |
d <- rjson::fromJSON(file = url)$items %>% | |
dplyr::bind_rows() %>% | |
mutate(date = lubridate::as_date(timestamp, format = "%Y%m%d")) | |
d | |
} | |
pageviews_raw <- lapply(ess7_values_means$language_home_iso2, function(lang_iso2) { | |
tryCatch(lapply(page_names, function(page_name) get_counts_perpage(lang_iso2, page_name)), | |
error = function(e) return(NULL)) | |
}) | |
pageviews_project <- lapply(ess7_values_means$language_home_iso2, function(lang_iso2) { | |
tryCatch(get_counts_perproject(lang_iso2), | |
error = function(e) return(NULL)) | |
}) | |
d <- list() | |
for(iproject in seq_along(pageviews_raw)) { | |
if(!is.null(pageviews_raw[[iproject]])) | |
d[[iproject]] <- | |
dplyr::inner_join(pageviews_raw[[iproject]][[1]], pageviews_raw[[iproject]][[2]], by = c("date")) %>% | |
dplyr::inner_join(pageviews_project[[iproject]], | |
by = c("date")) | |
} | |
d <- Reduce(bind_rows, d) | |
wiki_views <- d %>% | |
dplyr::mutate(views_raw_high = ifelse(is.na(views.x), 0, views.x), | |
views_raw_low = ifelse(is.na(views.y), 0, views.y), | |
views_project = views) %>% | |
tidyr::separate(project, into = c("language", "project")) %>% | |
select(language, date, starts_with("views_")) | |
mean_interest <- wiki_views %>% | |
group_by(language) %>% | |
summarize(views_raw_high_avg = median(views_raw_high), | |
views_raw_low_avg = median(views_raw_low), | |
views_project_avg = median(views_project)) | |
ess7_values_means$language <- ess7_values_means$language_home_iso2 | |
interest_joined <- inner_join(mean_interest, ess7_values_means, by = "language") | |
interest_joined %>% | |
mutate(`Important to be creative` = cut(ipcrtiv, quantile(ipcrtiv, probs = c(0, 0.33, 0.5, 0.67, 1)))) %>% | |
ggplot(aes(views_raw_low_avg, views_raw_high_avg, label = language, | |
colour = `Important to be creative`, group = `Important to be creative`)) + | |
ggplot2::scale_x_log10() + ggplot2::scale_y_log10() + | |
ggplot2::xlab("Wikipedia pageviews: 'Game of Thrones'") + | |
ggplot2::ylab("Wikipedia pageviews: 'William Shakespeare'") + | |
geom_smooth(method = "lm", se = F) + | |
geom_text() + | |
scale_colour_brewer(palette = "RdYlBu") + | |
ggplot2::ggtitle("Interest in Low versus high culture on Wikipedia\nis related to Schwarz human values") + | |
theme_minimal(base_family = "Helvetica") + | |
theme(panel.border = element_rect(colour = "#00000055", fill=NA, size=0.25)) | |
base_width <- 6.5 | |
golden_ratio <- 1.7 | |
ggsave("low_vs_high.pdf", width = base_width, height = base_width/golden_ratio) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment