Created
March 4, 2016 09:14
-
-
Save expersso/5935396a7e3034774c53 to your computer and use it in GitHub Desktop.
Slope graph for employment by industry
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(xml2) | |
library(dplyr) | |
library(tidyr) | |
library(stringr) | |
library(ggplot2) | |
url <- "http://www.bls.gov/opub/ted/2016/employment-by-industry-1910-and-2015.htm" | |
page <- read_html(url) | |
df <- page %>% | |
xml_find_all("//table[@id='ted_20160303']") %>% | |
rvest::html_table(trim = TRUE, fill = TRUE) %>% | |
.[[1]] | |
df[] <- apply(df, 2, str_replace_all, | |
pattern = "\\s{2,}|,|\\([0-9]\\)", replace = "") | |
names(df) <- c("industry", paste(rep(c(1910, 2015), each = 2), | |
df[1, -5], sep = "_")) %>% tolower() | |
df <- df[2:14, ] | |
df <- df %>% | |
gather(variable, value, -industry) %>% | |
mutate(value = as.numeric(value)) %>% | |
separate(variable, c("year", "variable"), "_") %>% | |
mutate(industry = plyr::revalue(industry, | |
c("Government not elsewhere classified" = "Government (other)"))) | |
df %>% | |
filter(variable == "percent of total") %>% | |
ggplot(aes(x = year, y = value, color = industry, group = industry)) + | |
geom_point() + | |
geom_line() + | |
geom_text(aes(label = sprintf(" %s ", industry)), | |
hjust = "outward", size = 3) + | |
scale_y_continuous(breaks = seq(0, 35, 5)) + | |
scale_x_discrete(expand = c(1.5, 1.5)) + | |
theme_light(11) + | |
theme(legend.position = "none", panel.grid = element_blank()) + | |
labs(x = NULL, y = NULL, | |
title = bquote(atop(bold(.("Employment by industry, 1910 and 2015")), | |
scriptstyle(.("Percent of total"))))) |
Author
expersso
commented
Mar 4, 2016
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment