Last active
March 10, 2025 14:53
-
-
Save USMortality/c8cfc12452fa060c4f1a0cd7f002234e to your computer and use it in GitHub Desktop.
Olympic Medals (2024) [World]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(dplyr) | |
library(ggpubr) | |
library(readxl) | |
library(scales) | |
sf <- 2 | |
width <- 1440 * sf | |
height <- 960 * sf | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
# Medals | |
medals <- read_html( | |
"https://en.wikipedia.org/wiki/2024_Summer_Olympics_medal_table" | |
) %>% | |
html_nodes("table") %>% | |
.[[4]] %>% | |
html_table() %>% | |
select(2, 6) %>% | |
setNames(c("country", "medals")) |> | |
mutate( | |
country = gsub("\\*", "", country), | |
medals = as.integer(medals) | |
) | |
download.file( | |
paste0( | |
"https://population.un.org/wpp/assets/Excel%20Files/1_Indicator%20", | |
"(Standard)/EXCEL_FILES/1_General/", | |
"WPP2024_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT.xlsx" | |
), | |
"/tmp/un_pop.xlsx" | |
) | |
# Population | |
pop <- read_excel( | |
"/tmp/un_pop.xlsx", | |
sheet = "Estimates", range = "C17:R22000" | |
) |> | |
select(1, 9, 10, 16) |> | |
setNames(c("country", "year", "population", "median_age")) |> | |
mutate( | |
year = as.numeric(year), | |
population = as.numeric(population), | |
median_age = as.numeric(median_age) | |
) |> | |
group_by(country) |> | |
filter(year == max(year)) |> | |
ungroup() | |
pop$country[pop$country == "United States of America"] <- "United States" | |
pop$country[pop$country == "United Kingdom"] <- "Great Britain" | |
pop$country[pop$country == "Republic of Korea"] <- "South Korea" | |
pop$country[pop$country == "Iran (Islamic Republic of)"] <- "Iran" | |
pop$country[pop$country == "Czechia"] <- "Czech Republic" | |
pop$country[pop$country == "China, Taiwan Province of China"] <- "Chinese Taipei" | |
pop$country[pop$country == "China, Hong Kong SAR"] <- "Hong Kong" | |
pop$country[pop$country == "Dem. People's Republic of Korea"] <- "North Korea" | |
pop$country[pop$country == "Republic of Moldova"] <- "Moldova" | |
pop$country[pop$country == "Kosovo (under UNSC res. 1244)"] <- "Kosovo" | |
pop$country[pop$country == "Cabo Verde"] <- "Cape Verde" | |
pop$country[pop$country == "Côte d'Ivoire"] <- "Ivory Coast" | |
# GDP | |
gdp <- read_html( | |
"https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(PPP)_per_capita" | |
) %>% | |
html_nodes("table") %>% | |
.[[2]] %>% | |
html_table() %>% | |
select(1, 2) %>% | |
setNames(c("country", "gdp")) %>% | |
mutate( | |
gdp = as.numeric(gsub(",", "", gdp)), | |
country = gsub(" \\*", "", country) | |
) %>% | |
filter(!is.na(gdp)) | |
df <- pop |> | |
inner_join(medals, by = join_by(country)) |> | |
inner_join(gdp, by = join_by(country)) |> | |
select(-year) |> | |
mutate( | |
population = population / 1000, | |
pop_per_medal = population / medals, | |
pop_per_medal_adj = pop_per_medal * (pop[1, ]$median_age / median_age) | |
) | |
# Population per Medal vs Population | |
chart <- ggscatter( | |
df, | |
x = "pop_per_medal", | |
y = "population", | |
label = "country", | |
add = "reg.line", # Add regression line | |
add.params = list(color = "blue", fill = "lightgray"), | |
conf.int = TRUE # Add confidence interval | |
) + | |
scale_y_log10(labels = label_number(accuracy = 1)) + | |
scale_x_log10(labels = label_number(accuracy = 1)) + | |
stat_cor(color = "red", method = "pearson") + | |
labs( | |
title = "Olympics 2024: Population per Medal vs Population", | |
y = "Population (millions) [log]", | |
x = "Population per Medal (millions) [log]" | |
) | |
ggplot2::ggsave( | |
filename = "chart1.png", plot = chart, width = width, height = height, | |
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
) | |
# Medals per Median Age | |
chart <- ggscatter( | |
df, | |
x = "pop_per_medal", | |
y = "median_age", | |
label = "country", | |
add = "reg.line", # Add regression line | |
add.params = list(color = "blue", fill = "lightgray"), | |
conf.int = TRUE # Add confidence interval | |
) + | |
scale_x_log10(labels = label_number(accuracy = 1)) + | |
stat_cor(color = "red", method = "pearson") + | |
labs( | |
title = "Olympics 2024: Population per Medal vs Median Age", | |
y = "Median Age", | |
x = "Population per Medal (Log)" | |
) | |
ggplot2::ggsave( | |
filename = "chart2.png", plot = chart, width = width, height = height, | |
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
) | |
# Medals per GDP per Capita (PPP) | |
chart <- ggscatter( | |
df, | |
x = "pop_per_medal", | |
y = "gdp", | |
label = "country", | |
add = "reg.line", # Add regression line | |
add.params = list(color = "blue", fill = "lightgray"), | |
conf.int = TRUE # Add confidence interval | |
) + | |
scale_y_log10(labels = label_number(accuracy = 1)) + | |
scale_x_log10(labels = label_number(accuracy = 1)) + | |
stat_cor(color = "red", method = "pearson") + | |
labs( | |
title = "Olympics 2024: Medals vs GDP (PPP) per capita", | |
y = "GDP (PPP) per capita", | |
x = "Population per Medal (Log)" | |
) | |
ggplot2::ggsave( | |
filename = "chart3.png", plot = chart, width = width, height = height, | |
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://www.mortality.watch/charts/list.html#olympic-medals-2024-world