Sandy4321 · December 17, 2015 16:13
diff --git a/beautiful_time_plot b/beautiful_time_plot
 #http://rpubs.com/bradleyboehmke/median_income_trends
 # Preprocessing & summarizing data
 library(dplyr)
 library(tidyr)
 library(RCurl)
 library(XLConnect)
 library(magrittr)
 appURL <- "https://www.census.gov/hhes/www/income/data/historical/household/2013/h08.xls"
 f = CFILE("exfile.xls", mode="wb")
 curlPerform(url = appURL, writedata = f@ref, ssl.verifypeer = FALSE)
 close(f)
 out <- readWorksheetFromFile(file = "exfile.xls", sheet = "h08", header = FALSE
 														 , startRow = 62, endRow = 113)
 'State' -> names(out)[1]
 out[,c(1,seq(2,61,2))] -> data
 paste0("X",2013:1984) -> names(data)[2:31]
 data %>% tbl_df -> data

 data %>%
 	filter(State == "United States") %>%
 	gather(Year, Income, X2013:X1984) %>%
 	separate(Year, c("left","Year"), sep="X") %>%
 	select(-left) %>%
 	arrange(Year) -> us

 # extract 1984 value as the baseline value and add to us dataframe
 us[us$Year==1984,3] -> us_base
 us_base$Income -> us$us_baseline

 # calculate the percent change in U.S. median income for each year as compared
 # to 1984 (aka the baseline)
 us %>%
 	mutate(us_change = (Income-us_baseline)/us_baseline) -> us
 # create a states dataframe, clean and turn into long format
 data %>%
 	filter(State != "United States") %>%
 	gather(Year, Income, X2013:X1984) %>%
 	separate(Year, c("left","Year"), sep="X") %>%
 	select(-left) %>%
 	arrange(Year) %>%
 	filter(Income != "NA") -> states

 # create baselines for each state
 state_base <- states %>%
 	filter(Year == 1984) %>%
 	select(State, State_Baseline = Income)

 # add baseline to the states and calculate the percent change in median income
 # for each state as compared to 1984 (aka the baseline)
 states %<>%
 	left_join(state_base) %>%
 	arrange(State) %>%
 	mutate(state_change = (Income-State_Baseline)/State_Baseline)

 # change year variables from character to numeric
 states$Year %<>% as.numeric
 us$Year %<>% as.numeric

 # get top 5 and bottom 5 states which will allow me to identify them
 # graphically
 states %>%
 	filter(Year == 2013) %>%
 	arrange(desc(state_change)) %>%
 	mutate(rank = seq(1,length(State), by=1)) %>%
 	filter(rank < 6 | rank > 46 ) -> rank

 # Visualizatin development
 library(grid)
 library(scales)
 library(ggplot2)

 ## Plot It

 states %>%
 	ggplot(aes(Year, state_change, group=State)) +
 theme_bw() +
 	theme(plot.background = element_blank(),
 				panel.grid.minor = element_blank(),
 				panel.grid.major.x = element_blank(),
 				panel.grid.major.y = element_line(linetype = 3, colour = "grey50"),
 				panel.border = element_blank(),
 				panel.background = element_blank(),
 				axis.ticks = element_blank(),
 				axis.title = element_blank()) +
 	geom_line(colour="grey90", alpha=.9) -> p

 p

 #Step 2

 #Next, I plot the U.S. overall average with a dashed line.

 p %<>% +
 	geom_line(data=us, aes(Year, us_change, group=1), linetype=5)

 p

 #Step 3

 #I also wanted to identify how my own state (Ohio) has trended over the years so I filtered the states dataframe for only Ohio data and singled it out with a blue color.

 p %<>% +
 	geom_line(data=filter(states, State=="Ohio"),
 						aes(Year, state_change, group=State), colour="dodgerblue",
 						size = 1)

 p

 #Step 4

 #I wanted to create some boundaries with the biggest economic “winner” and “loser”. So I identified the state that had the largest growth from 1984 to 2013 and the state that had the greatest contraction and singled these out with a slightly darker grey than all the other states.

 p %<>% +
 	geom_line(data=filter(states, State=="D.C."),
 						aes(Year, state_change, group=State), colour="grey70") +
 	geom_line(data=filter(states, State=="Nevada"),
 						aes(Year, state_change, group=State), colour="grey70")

 p
 #Step 5

 #Next, I identify the top 5 and bottom 5 states along the overall US by plotting points on their 2013 values.

 p %<>% +
 	geom_point(data=rank, aes(Year, state_change), shape=21, size=1.5, alpha=.6) +
 	geom_point(data=filter(us, Year == 2013), aes(Year, us_change), size=2.5, alpha=.6)

 p

 #Step 6

 #the last step I performed in R was to format the x- and y-axis. For the y-axis I fixed the limits and breaks (this was primarily because I was tinkering around with the dimensions of the chart but wanted to keep the breaks fixed) and turned the labels to a percent format. For the x-axis I increased the breaks to every 5 years and reduced the padding at the ends of the axis. Also note that I extend the x-axis to 1983 even though my data only goes back to 1984. This is to add more space on the left side of the x-axis; the reason for this becomes evident in the final graphic where I move the y-axis labels.

 p %<>% +
 	scale_y_continuous(limits=c(-.2,.55), breaks=seq(-.2,.4,by=.2), label=percent) +
 	scale_x_continuous(limits=c(1983,2013),breaks=seq(1985,2010,by=5), expand=c(0,.25))

 p + ggtitle("Brad's Beautiful Time Series Plot") +
 	theme(plot.title=element_text(face="bold",hjust=.012,vjust=.8,colour="#3C3C3C",size=20))
	#http://rpubs.com/bradleyboehmke/median_income_trends
	# Preprocessing & summarizing data
	library(dplyr)
	library(tidyr)
	library(RCurl)
	library(XLConnect)
	library(magrittr)
	appURL <- "https://www.census.gov/hhes/www/income/data/historical/household/2013/h08.xls"
	f = CFILE("exfile.xls", mode="wb")
	curlPerform(url = appURL, writedata = f@ref, ssl.verifypeer = FALSE)
	close(f)
	out <- readWorksheetFromFile(file = "exfile.xls", sheet = "h08", header = FALSE
	, startRow = 62, endRow = 113)
	'State' -> names(out)[1]
	out[,c(1,seq(2,61,2))] -> data
	paste0("X",2013:1984) -> names(data)[2:31]
	data %>% tbl_df -> data

	data %>%
	filter(State == "United States") %>%
	gather(Year, Income, X2013:X1984) %>%
	separate(Year, c("left","Year"), sep="X") %>%
	select(-left) %>%
	arrange(Year) -> us

	# extract 1984 value as the baseline value and add to us dataframe
	us[us$Year==1984,3] -> us_base
	us_base$Income -> us$us_baseline

	# calculate the percent change in U.S. median income for each year as compared
	# to 1984 (aka the baseline)
	us %>%
	mutate(us_change = (Income-us_baseline)/us_baseline) -> us
	# create a states dataframe, clean and turn into long format
	data %>%
	filter(State != "United States") %>%
	gather(Year, Income, X2013:X1984) %>%
	separate(Year, c("left","Year"), sep="X") %>%
	select(-left) %>%
	arrange(Year) %>%
	filter(Income != "NA") -> states

	# create baselines for each state
	state_base <- states %>%
	filter(Year == 1984) %>%
	select(State, State_Baseline = Income)

	# add baseline to the states and calculate the percent change in median income
	# for each state as compared to 1984 (aka the baseline)
	states %<>%
	left_join(state_base) %>%
	arrange(State) %>%
	mutate(state_change = (Income-State_Baseline)/State_Baseline)

	# change year variables from character to numeric
	states$Year %<>% as.numeric
	us$Year %<>% as.numeric

	# get top 5 and bottom 5 states which will allow me to identify them
	# graphically
	states %>%
	filter(Year == 2013) %>%
	arrange(desc(state_change)) %>%
	mutate(rank = seq(1,length(State), by=1)) %>%
	filter(rank < 6 \| rank > 46 ) -> rank

	# Visualizatin development
	library(grid)
	library(scales)
	library(ggplot2)

	## Plot It

	states %>%
	ggplot(aes(Year, state_change, group=State)) +
	theme_bw() +
	theme(plot.background = element_blank(),
	panel.grid.minor = element_blank(),
	panel.grid.major.x = element_blank(),
	panel.grid.major.y = element_line(linetype = 3, colour = "grey50"),
	panel.border = element_blank(),
	panel.background = element_blank(),
	axis.ticks = element_blank(),
	axis.title = element_blank()) +
	geom_line(colour="grey90", alpha=.9) -> p

	p

	#Step 2

	#Next, I plot the U.S. overall average with a dashed line.

	p %<>% +
	geom_line(data=us, aes(Year, us_change, group=1), linetype=5)

	p

	#Step 3

	#I also wanted to identify how my own state (Ohio) has trended over the years so I filtered the states dataframe for only Ohio data and singled it out with a blue color.

	p %<>% +
	geom_line(data=filter(states, State=="Ohio"),
	aes(Year, state_change, group=State), colour="dodgerblue",
	size = 1)

	p

	#Step 4

	#I wanted to create some boundaries with the biggest economic “winner” and “loser”. So I identified the state that had the largest growth from 1984 to 2013 and the state that had the greatest contraction and singled these out with a slightly darker grey than all the other states.

	p %<>% +
	geom_line(data=filter(states, State=="D.C."),
	aes(Year, state_change, group=State), colour="grey70") +
	geom_line(data=filter(states, State=="Nevada"),
	aes(Year, state_change, group=State), colour="grey70")

	p
	#Step 5

	#Next, I identify the top 5 and bottom 5 states along the overall US by plotting points on their 2013 values.

	p %<>% +
	geom_point(data=rank, aes(Year, state_change), shape=21, size=1.5, alpha=.6) +
	geom_point(data=filter(us, Year == 2013), aes(Year, us_change), size=2.5, alpha=.6)

	p

	#Step 6

	#the last step I performed in R was to format the x- and y-axis. For the y-axis I fixed the limits and breaks (this was primarily because I was tinkering around with the dimensions of the chart but wanted to keep the breaks fixed) and turned the labels to a percent format. For the x-axis I increased the breaks to every 5 years and reduced the padding at the ends of the axis. Also note that I extend the x-axis to 1983 even though my data only goes back to 1984. This is to add more space on the left side of the x-axis; the reason for this becomes evident in the final graphic where I move the y-axis labels.

	p %<>% +
	scale_y_continuous(limits=c(-.2,.55), breaks=seq(-.2,.4,by=.2), label=percent) +
	scale_x_continuous(limits=c(1983,2013),breaks=seq(1985,2010,by=5), expand=c(0,.25))

	p + ggtitle("Brad's Beautiful Time Series Plot") +
	theme(plot.title=element_text(face="bold",hjust=.012,vjust=.8,colour="#3C3C3C",size=20))