Skip to content

Instantly share code, notes, and snippets.

@Ryo-N7
Last active June 14, 2019 17:02
Show Gist options
  • Save Ryo-N7/a1bf88bae3b8d7b810d1f19d0157fcbe to your computer and use it in GitHub Desktop.
Save Ryo-N7/a1bf88bae3b8d7b810d1f19d0157fcbe to your computer and use it in GitHub Desktop.
Most successful teams of the Copa America!
## packages
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
library(rvest)
library(polite) ## github only
library(ggplot2)
library(scales)
library(glue)
library(extrafont)
loadfonts()
## webscrape
url <- "https://es.wikipedia.org/wiki/Anexo:Estad%C3%ADsticas_de_la_Copa_Am%C3%A9rica"
session <- bow(url)
copa_campeones <- scrape(session) %>%
html_nodes(".mw-parser-output > table:nth-child(10)") %>%
html_table() %>%
flatten_df()
## clean + tidy
copa_campeones_limpia <- copa_campeones %>%
janitor::clean_names() %>%
slice(1:8) %>%
select(1:4) %>%
set_names(c("team", "winners", "runners_up", "third_place")) %>%
separate(winners, into = c("Champions", "first_place_year"),
sep = " ", extra = "merge") %>%
separate(runners_up, into = c("Runners-up", "second_place_year"),
sep = " ", extra = "merge") %>%
separate(third_place, into = c("Third Place", "third_place_year"),
sep = " ", extra = "merge") %>%
mutate_all(list(~str_replace_all(., "–", "0"))) %>%
mutate_at(vars(contains("num")), funs(as.numeric)) %>%
gather(key = "key", value = "value", -team,
-first_place_year, -second_place_year, -third_place_year) %>%
mutate(key = as.factor(key),
value = as.numeric(value),
team = team %>% str_replace(., "[A-Z]{3}", "") %>% str_trim(.),
team = case_when(team == "Brasil" ~ "Brazil",
TRUE ~ team)) %>%
mutate(key = forcats::fct_relevel(key,
"Champions",
"Runners-up",
"Third Place")) %>%
arrange(key, desc(value)) %>%
mutate(team = forcats::as_factor(team),
order = row_number())
## save data
saveRDS(copa_campeones_limpia, file = here::here("data/copa_campeones_clean.RDS"))
copa_campeones_limpia <- readRDS(file = here::here("data/copa_campeones_clean.RDS"))
## PLOT
copa_ganadores_plot <- copa_campeones_limpia %>%
ggplot(aes(value, forcats::fct_rev(team), color = key)) +
geom_point(size = 10) + # 10
geom_text(aes(label = value),
size = 5, color = "black", # 5
family = "Roboto Condensed", fontface = "bold") +
scale_color_manual(values = c("Champions" = "#FFCC33",
"Runners-up" = "#999999",
"Third Place" = "#CC6600"),
guide = FALSE) +
scale_x_continuous(breaks = c(1, 5, 10, 15),
labels = c(1, 5, 10, 15),
limits = c(-1, 16)) +
labs(x = "Number of Occurrence", y = NULL,
title = "Most Successful Teams of the Copa América!",
subtitle = str_wrap("Ordered by number of Copa América(s) won. Argentina missed the chance to leapfrog Uruguay after consecutive final losses in the previous two tournaments!", width = 80),
caption = glue("
Source: Wikipedia
By @R_by_Ryo")) +
facet_wrap(~key) +
theme_copaAmerica(caption.size = 10)
copa_ganadores_plot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment