zmjones · January 2, 2016 20:29
diff --git a/maddison-new.R b/maddison-new.R
 pkgs <- c("reshape2", "gdata", "countrycode")
 invisible(lapply(pkgs, require, character.only = TRUE))

 mpd <- read.xls("http://www.ggdc.net/maddison/maddison-project/data/mpd_2013-01.xlsx",
                skip = 2, check.names = FALSE)
 mpd <- mpd[, !apply(mpd, 2, function(x) all(is.na(x)))]
 names(mpd)[1] <- "year"
 names(mpd)[124] <- "Byzantium_Ottoman_Empire_Turkey"
 colnames(mpd) <- trim(gsub("^[0-9]+|\\.|\\(|\\)|(&amp;)|'", "", colnames(mpd)))
 colnames(mpd) <- gsub("-\\s+|/\\s+", "-", colnames(mpd))
 colnames(mpd) <- gsub("\\s+|-|/", "_", colnames(mpd))
 ## identical(colnames(mpd), make.names(colnames(mpd), TRUE)) ## check duplicates
 colnames(mpd) <- make.names(colnames(mpd), TRUE)
 mpd <- melt(mpd, id.vars = "year")
 names(mpd)[2:3] <- c("country.name", "gdppc_1990")
 mpd$country.name <- as.character(mpd$country.name)
 mpd$country.name[mpd$country.name == "Costa_Rica"] <- "costa rica"
 mpd$country.name[mpd$country.name == "Turk_menistan"] <- "turkmenistan"
 mpd$country.name[mpd$country.name == "Haïti"] <- "haiti"
 mpd$country.name[mpd$country.name == "UAE"] <- "united arab emirates"
 mpd$country.name[mpd$country.name == "Czecho_slovakia"] <- "czech republic"
 mpd$country.name[mpd$country.name == "F_Czecho_slovakia"] <- "czechoslovakia"
 mpd$country.name[mpd$country.name == "N_Zealand"] <- "new zealand"
 mpd$ccode <- countrycode(mpd$country.name, "country.name", "cown")
 mpd$ccode[mpd$country.name == "Serbia"] <- 340
 ## unique(mpd$country.name[is.na(mpd$ccode)]) ## check to see what wasn't matched
 mpd$country.name <- countrycode(mpd$ccode, "cown", "country.name") ## standardize names
 mpd$country.name[mpd$ccode == 340] <- "SERBIA"
 ## failed matches are non-independent entities or regions
 ## dropping failed matches for now
 mpd <- mpd[!is.na(mpd$ccode), ]
 write.csv(mpd, "mpd.csv", row.names = FALSE)
diff --git a/maddison-old.R b/maddison-old.R
 pkgs <- c("gdata", "countrycode", "reshape2")
 invisible(lapply(pkgs, require, character.only = TRUE))

 read.mpd <- function(file, sheet, value) {
  df <- read.xls(file, sheet = sheet, skip = 2,
                 fileEncoding = "latin1")[, -c(2:11)]
  df <- melt(df, id.var = "X")
  colnames(df) <- c("country.name", "year", value)
  df$year <- as.integer(gsub("^X", "", as.character(df$year)))
  suppressWarnings(df[, 3] <- as.integer(df[, 3])) ## generates NAs
  df$country.name <- trim(df$country.name, FALSE)
  df$country.name[df$country.name == "Haïti"] <- "haiti"
  df$ccode <- countrycode(df$country.name, "country.name", "cown")
  ## unique(df$country.name[is.na(df$ccode)]) ## which country names aren't matched
  return(df[!is.na(df$ccode), ])
 }

 url <- "http://www.ggdc.net/maddison/Historical_Statistics/horizontal-file_02-2010.xls"
 mpd.pop <- read.mpd(url, 2, "population")
 mpd.gdp <- read.mpd(url, 3, "gdp_gk_1990")
 mpd <- merge(mpd.gdp, mpd.pop)
 write.csv(mpd, "mpd-old.csv", row.names = FALSE)
	pkgs <- c("reshape2", "gdata", "countrycode")
	invisible(lapply(pkgs, require, character.only = TRUE))

	mpd <- read.xls("http://www.ggdc.net/maddison/maddison-project/data/mpd_2013-01.xlsx",
	skip = 2, check.names = FALSE)
	mpd <- mpd[, !apply(mpd, 2, function(x) all(is.na(x)))]
	names(mpd)[1] <- "year"
	names(mpd)[124] <- "Byzantium_Ottoman_Empire_Turkey"
	colnames(mpd) <- trim(gsub("^[0-9]+\|\\.\|\\(\|\\)\|(&)\|'", "", colnames(mpd)))
	colnames(mpd) <- gsub("-\\s+\|/\\s+", "-", colnames(mpd))
	colnames(mpd) <- gsub("\\s+\|-\|/", "_", colnames(mpd))
	## identical(colnames(mpd), make.names(colnames(mpd), TRUE)) ## check duplicates
	colnames(mpd) <- make.names(colnames(mpd), TRUE)
	mpd <- melt(mpd, id.vars = "year")
	names(mpd)[2:3] <- c("country.name", "gdppc_1990")
	mpd$country.name <- as.character(mpd$country.name)
	mpd$country.name[mpd$country.name == "Costa_Rica"] <- "costa rica"
	mpd$country.name[mpd$country.name == "Turk_menistan"] <- "turkmenistan"
	mpd$country.name[mpd$country.name == "Haïti"] <- "haiti"
	mpd$country.name[mpd$country.name == "UAE"] <- "united arab emirates"
	mpd$country.name[mpd$country.name == "Czecho_slovakia"] <- "czech republic"
	mpd$country.name[mpd$country.name == "F_Czecho_slovakia"] <- "czechoslovakia"
	mpd$country.name[mpd$country.name == "N_Zealand"] <- "new zealand"
	mpd$ccode <- countrycode(mpd$country.name, "country.name", "cown")
	mpd$ccode[mpd$country.name == "Serbia"] <- 340
	## unique(mpd$country.name[is.na(mpd$ccode)]) ## check to see what wasn't matched
	mpd$country.name <- countrycode(mpd$ccode, "cown", "country.name") ## standardize names
	mpd$country.name[mpd$ccode == 340] <- "SERBIA"
	## failed matches are non-independent entities or regions
	## dropping failed matches for now
	mpd <- mpd[!is.na(mpd$ccode), ]
	write.csv(mpd, "mpd.csv", row.names = FALSE)
	pkgs <- c("gdata", "countrycode", "reshape2")
	invisible(lapply(pkgs, require, character.only = TRUE))

	read.mpd <- function(file, sheet, value) {
	df <- read.xls(file, sheet = sheet, skip = 2,
	fileEncoding = "latin1")[, -c(2:11)]
	df <- melt(df, id.var = "X")
	colnames(df) <- c("country.name", "year", value)
	df$year <- as.integer(gsub("^X", "", as.character(df$year)))
	suppressWarnings(df[, 3] <- as.integer(df[, 3])) ## generates NAs
	df$country.name <- trim(df$country.name, FALSE)
	df$country.name[df$country.name == "Haïti"] <- "haiti"
	df$ccode <- countrycode(df$country.name, "country.name", "cown")
	## unique(df$country.name[is.na(df$ccode)]) ## which country names aren't matched
	return(df[!is.na(df$ccode), ])
	}

	url <- "http://www.ggdc.net/maddison/Historical_Statistics/horizontal-file_02-2010.xls"
	mpd.pop <- read.mpd(url, 2, "population")
	mpd.gdp <- read.mpd(url, 3, "gdp_gk_1990")
	mpd <- merge(mpd.gdp, mpd.pop)
	write.csv(mpd, "mpd-old.csv", row.names = FALSE)