Skip to content

Instantly share code, notes, and snippets.

@zmjones
Last active January 2, 2016 20:29
Show Gist options
  • Save zmjones/8357005 to your computer and use it in GitHub Desktop.
Save zmjones/8357005 to your computer and use it in GitHub Desktop.
reshape the Maddison historical GDP and population data
pkgs <- c("reshape2", "gdata", "countrycode")
invisible(lapply(pkgs, require, character.only = TRUE))
mpd <- read.xls("http://www.ggdc.net/maddison/maddison-project/data/mpd_2013-01.xlsx",
skip = 2, check.names = FALSE)
mpd <- mpd[, !apply(mpd, 2, function(x) all(is.na(x)))]
names(mpd)[1] <- "year"
names(mpd)[124] <- "Byzantium_Ottoman_Empire_Turkey"
colnames(mpd) <- trim(gsub("^[0-9]+|\\.|\\(|\\)|(&amp;)|'", "", colnames(mpd)))
colnames(mpd) <- gsub("-\\s+|/\\s+", "-", colnames(mpd))
colnames(mpd) <- gsub("\\s+|-|/", "_", colnames(mpd))
## identical(colnames(mpd), make.names(colnames(mpd), TRUE)) ## check duplicates
colnames(mpd) <- make.names(colnames(mpd), TRUE)
mpd <- melt(mpd, id.vars = "year")
names(mpd)[2:3] <- c("country.name", "gdppc_1990")
mpd$country.name <- as.character(mpd$country.name)
mpd$country.name[mpd$country.name == "Costa_Rica"] <- "costa rica"
mpd$country.name[mpd$country.name == "Turk_menistan"] <- "turkmenistan"
mpd$country.name[mpd$country.name == "Haïti"] <- "haiti"
mpd$country.name[mpd$country.name == "UAE"] <- "united arab emirates"
mpd$country.name[mpd$country.name == "Czecho_slovakia"] <- "czech republic"
mpd$country.name[mpd$country.name == "F_Czecho_slovakia"] <- "czechoslovakia"
mpd$country.name[mpd$country.name == "N_Zealand"] <- "new zealand"
mpd$ccode <- countrycode(mpd$country.name, "country.name", "cown")
mpd$ccode[mpd$country.name == "Serbia"] <- 340
## unique(mpd$country.name[is.na(mpd$ccode)]) ## check to see what wasn't matched
mpd$country.name <- countrycode(mpd$ccode, "cown", "country.name") ## standardize names
mpd$country.name[mpd$ccode == 340] <- "SERBIA"
## failed matches are non-independent entities or regions
## dropping failed matches for now
mpd <- mpd[!is.na(mpd$ccode), ]
write.csv(mpd, "mpd.csv", row.names = FALSE)
pkgs <- c("gdata", "countrycode", "reshape2")
invisible(lapply(pkgs, require, character.only = TRUE))
read.mpd <- function(file, sheet, value) {
df <- read.xls(file, sheet = sheet, skip = 2,
fileEncoding = "latin1")[, -c(2:11)]
df <- melt(df, id.var = "X")
colnames(df) <- c("country.name", "year", value)
df$year <- as.integer(gsub("^X", "", as.character(df$year)))
suppressWarnings(df[, 3] <- as.integer(df[, 3])) ## generates NAs
df$country.name <- trim(df$country.name, FALSE)
df$country.name[df$country.name == "Haïti"] <- "haiti"
df$ccode <- countrycode(df$country.name, "country.name", "cown")
## unique(df$country.name[is.na(df$ccode)]) ## which country names aren't matched
return(df[!is.na(df$ccode), ])
}
url <- "http://www.ggdc.net/maddison/Historical_Statistics/horizontal-file_02-2010.xls"
mpd.pop <- read.mpd(url, 2, "population")
mpd.gdp <- read.mpd(url, 3, "gdp_gk_1990")
mpd <- merge(mpd.gdp, mpd.pop)
write.csv(mpd, "mpd-old.csv", row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment