Last active
November 30, 2015 15:20
-
-
Save expersso/586c8f2e30765c053596 to your computer and use it in GitHub Desktop.
Fatal dog attacks by category of dog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lapply(c("dplyr", "xml2", "rvest", "stringr", "ggplot2"), library, | |
character.only = TRUE) | |
url <- "https://en.wikipedia.org/wiki/Fatal_dog_attacks_in_the_United_States" | |
page <- read_html(url) | |
# Each year is represented by its own table | |
tbls <- page %>% | |
xml_find_all("//table") %>% | |
html_table() | |
# Extract the year for each table from their respective subheadings | |
years <- page %>% | |
xml_find_all("//h3/span[contains(text(), 'Fatalities reported in ')]") %>% | |
xml_text() %>% | |
str_replace_all("[^0-9]+", "") %>% | |
as.numeric() %>% | |
.[. != 1985] # No table for year 1985 | |
# Add years to tables | |
for(i in seq_along(years)) { | |
tbls[[i]]$year <- years[i] | |
} | |
# Bind together all tables | |
df <- rbind_all(tbls) | |
# Remove footnote references and plural endings | |
df$`Category of Dog` <- df$`Category of Dog` %>% | |
str_replace_all(" \\(.*\\)|\\(e?s\\)", "") | |
# Consolidate categories to include mixes | |
for(dog in c("Pit bull", "Rottweiler", "Bulldog", "Husky", "German Shepherd")) { | |
df$`Category of Dog`[ | |
str_detect(df$`Category of Dog`, regex(dog, ignore_case = TRUE))] <- | |
paste(dog, " (incl. mixes)") | |
} | |
df %>% | |
group_by(`Category of Dog`) %>% | |
summarise(n = n()) %>% | |
filter(n > 2) %>% | |
ggplot(aes(x = reorder(`Category of Dog`, n), y = n)) + | |
geom_bar(stat = "identity") + | |
coord_flip() + | |
scale_y_continuous(expand = c(0, 0), limits = c(0, NA)) + | |
theme_bw() + | |
labs(x = NULL, y = "\nNumber of attacks", | |
title = "Fatal dog attacks by category of dog\n United States; 1887-2015") |
Author
expersso
commented
Nov 30, 2015
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment