Last active
October 25, 2021 18:08
-
-
Save agoldst/d92ffa634aa6ddbf1c72b2401b21cf11 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Nobel genre tallies" | |
output: | |
html_document: | |
self_contained: false | |
... | |
```{r setup, include=F} | |
library(tidyverse) | |
library(rvest) | |
library(knitr) | |
opts_chunk$set(echo=F) | |
``` | |
```{r constants} | |
# JSON data available via | |
# <http://api.nobelprize.org/2.1/nobelPrizes?nobelPrizeCategory=lit> and | |
# <http://api.nobelprize.org/2.1/laureates?nobelPrizeCategory=lit> | |
# but Wikipedia's table is easier to scrape | |
url <- "https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Literature" | |
data_file <- "nobel.tsv" | |
``` | |
```{r prize-retrieval} | |
if (!file.exists(data_file)) { | |
list_page <- read_html(url) | |
# first table is the big list | |
list_page %>% html_node("table") %>% | |
html_table(fill=T) %>% | |
select(-Picture) %>% | |
write_tsv(data_file) | |
} | |
``` | |
```{r load-prize} | |
laureates <- read_tsv(data_file) %>% | |
rename(year=Year, genre=`Genre(s)`) | |
``` | |
```{r genre-tally} | |
genres <- laureates %>% | |
mutate(genre=str_split(genre, ", ")) %>% | |
select(year, genre) %>% | |
unnest(genre) %>% | |
filter(genre != "Not awarded") %>% | |
# recode genre | |
mutate(genre=case_when( | |
genre %in% c("novel", "short story") ~ "fiction", | |
genre %in% c("memoirs", "biography", "autobiography", | |
"philosophy", "literary criticism", "philology", | |
"history", "law", "essay") ~ "non-fiction", | |
genre %in% c("music", "songwriting") ~ "music", | |
TRUE ~ genre)) %>% | |
# deduplicate | |
distinct() %>% | |
mutate(count=1) %>% | |
pivot_wider(names_from="genre", values_from="count", | |
values_fill = 0, values_fn=sum) | |
``` | |
```{r genre-cum-plot} | |
genres_cum <- genres %>% | |
mutate(across(!year, cumsum)) %>% | |
pivot_longer(-year, names_to="genre", values_to="count") %>% | |
mutate(genre=fct_reorder2(genre, year, count)) | |
genres_cum %>% | |
group_by(year) %>% | |
mutate(count = count/sum(count)) %>% | |
ggplot(aes(year, count, fill=genre)) + | |
geom_area(position="stack") + | |
scale_x_continuous(breaks=seq(1910, 2020, by=10)) + | |
scale_y_continuous(labels=function (x) str_c(x * 100, "%")) + | |
scale_fill_viridis_d() + | |
coord_cartesian(expand=F) + | |
labs(y="cumulative proportion of laureates' genres", | |
title="Genres' cumulative share of the literature Nobel, 1901–2021", | |
caption=str_wrap( | |
"Data from https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Literature. The height of each strip represents the cumulative proportion of laureates up to that date who worked in that genre. N.B. when a writer worked in multiple genres, that writer's prize is counted multiple times.")) | |
``` | |
```{r genre-cum-facet} | |
genres_cum %>% | |
filter(genre %in% c("fiction", "poetry", "non-fiction", "drama")) %>% | |
ggplot(aes(year, count, fill=genre)) + | |
geom_area() + | |
facet_wrap(~ genre) + | |
scale_fill_viridis_d(guide="none") + | |
coord_cartesian(expand=F) + | |
labs(y="cumulative count of laureates' genres", | |
title="Cumulative tally of literature Nobel laureates' genres", | |
caption="Data as above, for the four most numerous genres only.") | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment