Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active March 10, 2025 14:53
Show Gist options
  • Save USMortality/c8cfc12452fa060c4f1a0cd7f002234e to your computer and use it in GitHub Desktop.
Save USMortality/c8cfc12452fa060c4f1a0cd7f002234e to your computer and use it in GitHub Desktop.
Olympic Medals (2024) [World]
library(rvest)
library(dplyr)
library(ggpubr)
library(readxl)
library(scales)
sf <- 2
width <- 1440 * sf
height <- 960 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
# Medals
medals <- read_html(
"https://en.wikipedia.org/wiki/2024_Summer_Olympics_medal_table"
) %>%
html_nodes("table") %>%
.[[4]] %>%
html_table() %>%
select(2, 6) %>%
setNames(c("country", "medals")) |>
mutate(
country = gsub("\\*", "", country),
medals = as.integer(medals)
)
download.file(
paste0(
"https://population.un.org/wpp/assets/Excel%20Files/1_Indicator%20",
"(Standard)/EXCEL_FILES/1_General/",
"WPP2024_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT.xlsx"
),
"/tmp/un_pop.xlsx"
)
# Population
pop <- read_excel(
"/tmp/un_pop.xlsx",
sheet = "Estimates", range = "C17:R22000"
) |>
select(1, 9, 10, 16) |>
setNames(c("country", "year", "population", "median_age")) |>
mutate(
year = as.numeric(year),
population = as.numeric(population),
median_age = as.numeric(median_age)
) |>
group_by(country) |>
filter(year == max(year)) |>
ungroup()
pop$country[pop$country == "United States of America"] <- "United States"
pop$country[pop$country == "United Kingdom"] <- "Great Britain"
pop$country[pop$country == "Republic of Korea"] <- "South Korea"
pop$country[pop$country == "Iran (Islamic Republic of)"] <- "Iran"
pop$country[pop$country == "Czechia"] <- "Czech Republic"
pop$country[pop$country == "China, Taiwan Province of China"] <- "Chinese Taipei"
pop$country[pop$country == "China, Hong Kong SAR"] <- "Hong Kong"
pop$country[pop$country == "Dem. People's Republic of Korea"] <- "North Korea"
pop$country[pop$country == "Republic of Moldova"] <- "Moldova"
pop$country[pop$country == "Kosovo (under UNSC res. 1244)"] <- "Kosovo"
pop$country[pop$country == "Cabo Verde"] <- "Cape Verde"
pop$country[pop$country == "Côte d'Ivoire"] <- "Ivory Coast"
# GDP
gdp <- read_html(
"https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(PPP)_per_capita"
) %>%
html_nodes("table") %>%
.[[2]] %>%
html_table() %>%
select(1, 2) %>%
setNames(c("country", "gdp")) %>%
mutate(
gdp = as.numeric(gsub(",", "", gdp)),
country = gsub(" \\*", "", country)
) %>%
filter(!is.na(gdp))
df <- pop |>
inner_join(medals, by = join_by(country)) |>
inner_join(gdp, by = join_by(country)) |>
select(-year) |>
mutate(
population = population / 1000,
pop_per_medal = population / medals,
pop_per_medal_adj = pop_per_medal * (pop[1, ]$median_age / median_age)
)
# Population per Medal vs Population
chart <- ggscatter(
df,
x = "pop_per_medal",
y = "population",
label = "country",
add = "reg.line", # Add regression line
add.params = list(color = "blue", fill = "lightgray"),
conf.int = TRUE # Add confidence interval
) +
scale_y_log10(labels = label_number(accuracy = 1)) +
scale_x_log10(labels = label_number(accuracy = 1)) +
stat_cor(color = "red", method = "pearson") +
labs(
title = "Olympics 2024: Population per Medal vs Population",
y = "Population (millions) [log]",
x = "Population per Medal (millions) [log]"
)
ggplot2::ggsave(
filename = "chart1.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# Medals per Median Age
chart <- ggscatter(
df,
x = "pop_per_medal",
y = "median_age",
label = "country",
add = "reg.line", # Add regression line
add.params = list(color = "blue", fill = "lightgray"),
conf.int = TRUE # Add confidence interval
) +
scale_x_log10(labels = label_number(accuracy = 1)) +
stat_cor(color = "red", method = "pearson") +
labs(
title = "Olympics 2024: Population per Medal vs Median Age",
y = "Median Age",
x = "Population per Medal (Log)"
)
ggplot2::ggsave(
filename = "chart2.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# Medals per GDP per Capita (PPP)
chart <- ggscatter(
df,
x = "pop_per_medal",
y = "gdp",
label = "country",
add = "reg.line", # Add regression line
add.params = list(color = "blue", fill = "lightgray"),
conf.int = TRUE # Add confidence interval
) +
scale_y_log10(labels = label_number(accuracy = 1)) +
scale_x_log10(labels = label_number(accuracy = 1)) +
stat_cor(color = "red", method = "pearson") +
labs(
title = "Olympics 2024: Medals vs GDP (PPP) per capita",
y = "GDP (PPP) per capita",
x = "Population per Medal (Log)"
)
ggplot2::ggsave(
filename = "chart3.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment