Last active
June 14, 2019 17:02
-
-
Save Ryo-N7/a1bf88bae3b8d7b810d1f19d0157fcbe to your computer and use it in GitHub Desktop.
Most successful teams of the Copa America!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## packages | |
library(dplyr) | |
library(tidyr) | |
library(purrr) | |
library(stringr) | |
library(rvest) | |
library(polite) ## github only | |
library(ggplot2) | |
library(scales) | |
library(glue) | |
library(extrafont) | |
loadfonts() | |
## webscrape | |
url <- "https://es.wikipedia.org/wiki/Anexo:Estad%C3%ADsticas_de_la_Copa_Am%C3%A9rica" | |
session <- bow(url) | |
copa_campeones <- scrape(session) %>% | |
html_nodes(".mw-parser-output > table:nth-child(10)") %>% | |
html_table() %>% | |
flatten_df() | |
## clean + tidy | |
copa_campeones_limpia <- copa_campeones %>% | |
janitor::clean_names() %>% | |
slice(1:8) %>% | |
select(1:4) %>% | |
set_names(c("team", "winners", "runners_up", "third_place")) %>% | |
separate(winners, into = c("Champions", "first_place_year"), | |
sep = " ", extra = "merge") %>% | |
separate(runners_up, into = c("Runners-up", "second_place_year"), | |
sep = " ", extra = "merge") %>% | |
separate(third_place, into = c("Third Place", "third_place_year"), | |
sep = " ", extra = "merge") %>% | |
mutate_all(list(~str_replace_all(., "–", "0"))) %>% | |
mutate_at(vars(contains("num")), funs(as.numeric)) %>% | |
gather(key = "key", value = "value", -team, | |
-first_place_year, -second_place_year, -third_place_year) %>% | |
mutate(key = as.factor(key), | |
value = as.numeric(value), | |
team = team %>% str_replace(., "[A-Z]{3}", "") %>% str_trim(.), | |
team = case_when(team == "Brasil" ~ "Brazil", | |
TRUE ~ team)) %>% | |
mutate(key = forcats::fct_relevel(key, | |
"Champions", | |
"Runners-up", | |
"Third Place")) %>% | |
arrange(key, desc(value)) %>% | |
mutate(team = forcats::as_factor(team), | |
order = row_number()) | |
## save data | |
saveRDS(copa_campeones_limpia, file = here::here("data/copa_campeones_clean.RDS")) | |
copa_campeones_limpia <- readRDS(file = here::here("data/copa_campeones_clean.RDS")) | |
## PLOT | |
copa_ganadores_plot <- copa_campeones_limpia %>% | |
ggplot(aes(value, forcats::fct_rev(team), color = key)) + | |
geom_point(size = 10) + # 10 | |
geom_text(aes(label = value), | |
size = 5, color = "black", # 5 | |
family = "Roboto Condensed", fontface = "bold") + | |
scale_color_manual(values = c("Champions" = "#FFCC33", | |
"Runners-up" = "#999999", | |
"Third Place" = "#CC6600"), | |
guide = FALSE) + | |
scale_x_continuous(breaks = c(1, 5, 10, 15), | |
labels = c(1, 5, 10, 15), | |
limits = c(-1, 16)) + | |
labs(x = "Number of Occurrence", y = NULL, | |
title = "Most Successful Teams of the Copa América!", | |
subtitle = str_wrap("Ordered by number of Copa América(s) won. Argentina missed the chance to leapfrog Uruguay after consecutive final losses in the previous two tournaments!", width = 80), | |
caption = glue(" | |
Source: Wikipedia | |
By @R_by_Ryo")) + | |
facet_wrap(~key) + | |
theme_copaAmerica(caption.size = 10) | |
copa_ganadores_plot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment