Last active
          November 30, 2015 15:20 
        
      - 
      
- 
        Save expersso/586c8f2e30765c053596 to your computer and use it in GitHub Desktop. 
    Fatal dog attacks by category of dog
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | lapply(c("dplyr", "xml2", "rvest", "stringr", "ggplot2"), library, | |
| character.only = TRUE) | |
| url <- "https://en.wikipedia.org/wiki/Fatal_dog_attacks_in_the_United_States" | |
| page <- read_html(url) | |
| # Each year is represented by its own table | |
| tbls <- page %>% | |
| xml_find_all("//table") %>% | |
| html_table() | |
| # Extract the year for each table from their respective subheadings | |
| years <- page %>% | |
| xml_find_all("//h3/span[contains(text(), 'Fatalities reported in ')]") %>% | |
| xml_text() %>% | |
| str_replace_all("[^0-9]+", "") %>% | |
| as.numeric() %>% | |
| .[. != 1985] # No table for year 1985 | |
| # Add years to tables | |
| for(i in seq_along(years)) { | |
| tbls[[i]]$year <- years[i] | |
| } | |
| # Bind together all tables | |
| df <- rbind_all(tbls) | |
| # Remove footnote references and plural endings | |
| df$`Category of Dog` <- df$`Category of Dog` %>% | |
| str_replace_all(" \\(.*\\)|\\(e?s\\)", "") | |
| # Consolidate categories to include mixes | |
| for(dog in c("Pit bull", "Rottweiler", "Bulldog", "Husky", "German Shepherd")) { | |
| df$`Category of Dog`[ | |
| str_detect(df$`Category of Dog`, regex(dog, ignore_case = TRUE))] <- | |
| paste(dog, " (incl. mixes)") | |
| } | |
| df %>% | |
| group_by(`Category of Dog`) %>% | |
| summarise(n = n()) %>% | |
| filter(n > 2) %>% | |
| ggplot(aes(x = reorder(`Category of Dog`, n), y = n)) + | |
| geom_bar(stat = "identity") + | |
| coord_flip() + | |
| scale_y_continuous(expand = c(0, 0), limits = c(0, NA)) + | |
| theme_bw() + | |
| labs(x = NULL, y = "\nNumber of attacks", | |
| title = "Fatal dog attacks by category of dog\n United States; 1887-2015") | 
      
      
  Author
  
  
        
      
            expersso
  
      
      
      commented 
        Nov 30, 2015 
      
    
  

  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment