Skip to content

Instantly share code, notes, and snippets.

@briatte
Created October 16, 2014 05:22
Show Gist options
  • Select an option

  • Save briatte/ae2d920ed041e38a9a38 to your computer and use it in GitHub Desktop.

Select an option

Save briatte/ae2d920ed041e38a9a38 to your computer and use it in GitHub Desktop.
Piketty and Saez 2003, enhanced fig. 1
## Top incomes data by Piketty and Saez, Sep 2013 update.
## 2014-10-16
## source
# T. Piketty and E. Saez, "Income Inequality in the United States, 1913-1998"
# Quarterly Journal of Economics, 118(1), 2003, 1-39 [ fig. 1, enhanced ]
## packages
library(ggplot2)
library(reshape2)
library(scales)
library(xlsx)
## dataset
file = "incomes.csv"
if(!file.exists(file)) {
download.file("http://emlab.berkeley.edu/~saez/TabFig2012prel.xls", file, mode = "wb")
# read XLSX
ps = read.xlsx(file, sheetName = "Table A1",
startRow = 4, endRow = 105, colIndex = 1:7)
# drop first row
ps = ps[ -1, ]
# write CSV
write.csv(ps, file, row.names = FALSE)
}
# read CSV
ps = read.csv(file, stringsAsFactors = FALSE)
# result
str(ps)
## variables
# names
names(ps) = c("Year", paste0("top ", c(10, 5, 1, 0.5, 0.1, 0.01), "%"))
# reshape
ps = melt(ps, id = "Year", variable = "Fractile")
# subset
ps = na.omit(ps)
# result
head(ps)
tail(ps)
## plot
qplot(data = ps, x = Year, y = value / 100, linetype = Fractile, geom = "line") +
labs(y = NULL, x = NULL, title = "U.S. Top Income Shares of National Revenue, 1917-2012\n") +
geom_text(data = subset(ps, Year == 2012), aes(x = 2014, label = Fractile, hjust = 0)) +
scale_x_continuous(lim = c(1911, 2031), breaks = seq(1910, 2010, by = 20)) +
scale_y_continuous(labels = percent) +
theme_minimal(12) +
theme(legend.position = "none")
# uncomment to save
# ggsave("piketty_saez.png", width = 11, height = 6.375)
## have a nice day
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment