ulfelder · April 20, 2016 14:54 · ulfelder · Jun 7, 2013 · ramnathv · Jun 7, 2013
diff --git a/freedomintheworld.converter b/freedomintheworld.converter
 # This script converts Freedom House's annual Freedom in the World data from
 # the ugly, table-like Excel format in which it's posted into a data frame in R.
 # It is set up to work on future updates, too, and should automatically adjust
 # as years or countries are added.
 #
 # Before running this script: 
 #
 # 1. Open a browser & go to http://www.freedomhouse.org/report-types/freedom-world
 #
 # 2. Download the file "Country ratings and status, FIW 1973-2013(EXCEL)"
 # 
 # 3. Open that file.
 #
 # 4. Under the File menu, click on "Save As."
 # 
 # 5. Select your R working directory as the destination, name the file "fiw2013",
 #    and set "Save as type" to "Text (Tab delimited)".  Click "Yes" in response
 #    to the warning that follows.
 #
 # Then run the following in R.

 library(reshape)

 fiw <- read.delim("fiw2013.txt", header = FALSE)  # Assumes file is in working directory
 fiw2 <- fiw[9:dim(fiw)[1],]  # Lops off junk rows at the top
 row.names(fiw2) <- NULL   # Removes bogus variable names
 today <- Sys.Date()
 thisyear <- as.numeric(substr(today,1,4))
 ncountries <- nrow(fiw2[nchar(as.character(fiw2$V1)) > 1,])  # Counts countries
 nyears <- thisyear - 1972 - 1  # Counts years, adjusts for no data for 1981
 fiw3 <- fiw2[1:ncountries,1:((3 * nyears) + 1)]  # Trims the rest of the fat
 fiwvars <- c("name", rep(c("civlibs", "polrights", "status"),
             times = thisyear - 1972 - 1) )  # Again, -1 bc no data for 1981
 names(fiw3) <- fiwvars
 fiw4 <- t(fiw3)
 countries <- fiw4[1,]
 fiw5 <- fiw4[-1,]
 fiw6 <- melt(fiw5)
 fiwyears <- c(rep(1972:1980, each = 3), rep(1982:(thisyear - 1), each = 3))
 name <- rep(countries, each = length(fiwyears))
 year <- rep(fiwyears, times = length(countries))
 fiw7 <- cbind(name, year, fiw6)
 fiw7[,4] <- NULL
 names(fiw7) <- c("name", "year", "var", "value")

 civlibs <- subset(fiw7, var=="civlibs")
 civlibs[,3] <- NULL
 names(civlibs) <- c("name", "year", "civlibs")
 polrights <- subset(fiw7, var=="polrights")
 polrights[,3] <- NULL
 names(polrights) <- c("name", "year", "polrights")
 status <- subset(fiw7, var=="status")
 status[,3] <- NULL
 names(status) <- c("name", "year", "status")
 fiw8 <- merge(civlibs, polrights)
 fiw9 <- merge(fiw8, status)

 fiw9$name <- as.character(fiw9$name)
 fiw9$civlibs <- as.numeric(as.character(fiw9$civlibs))
 fiw9$polrights <- as.numeric(as.character(fiw9$polrights))
 fiw9$status <- as.character(fiw9$status)
 fiw10 <- subset(fiw9, is.na(civlibs)==FALSE)
 fiw <- fiw10

 rm(fiw2,fiw3,fiw4,fiw5,fiw6,fiw7,fiw8,fiw9,fiw10)

 # After running this script, you'll probably want to add ID codes
 # for countries to facilitate merging with other TSCS data. The
 # 'countrycode' package is useful here. For example, to get 
 # Correlates of War numeric codes, you could run:

 library(countrycode)
 fiw$ccode <- countrycode(fiw$name, "country.name", "cown", warn = TRUE)

 # Note that this process usually requires some massaging to get
 # matches for all cases. One way to do this is to open countrycode_data,
 # find the names it's using for the cases that don't match, replace
 # the relevant country names in 'fiw' with the ones countrycode wants,
 # and iterate this until all names match properly.
	# This script converts Freedom House's annual Freedom in the World data from
	# the ugly, table-like Excel format in which it's posted into a data frame in R.
	# It is set up to work on future updates, too, and should automatically adjust
	# as years or countries are added.
	#
	# Before running this script:
	#
	# 1. Open a browser & go to http://www.freedomhouse.org/report-types/freedom-world
	#
	# 2. Download the file "Country ratings and status, FIW 1973-2013(EXCEL)"
	#
	# 3. Open that file.
	#
	# 4. Under the File menu, click on "Save As."
	#
	# 5. Select your R working directory as the destination, name the file "fiw2013",
	# and set "Save as type" to "Text (Tab delimited)". Click "Yes" in response
	# to the warning that follows.
	#
	# Then run the following in R.

	library(reshape)

	fiw <- read.delim("fiw2013.txt", header = FALSE) # Assumes file is in working directory
	fiw2 <- fiw[9:dim(fiw)[1],] # Lops off junk rows at the top
	row.names(fiw2) <- NULL # Removes bogus variable names
	today <- Sys.Date()
	thisyear <- as.numeric(substr(today,1,4))
	ncountries <- nrow(fiw2[nchar(as.character(fiw2$V1)) > 1,]) # Counts countries
	nyears <- thisyear - 1972 - 1 # Counts years, adjusts for no data for 1981
	fiw3 <- fiw2[1:ncountries,1:((3 * nyears) + 1)] # Trims the rest of the fat
	fiwvars <- c("name", rep(c("civlibs", "polrights", "status"),
	times = thisyear - 1972 - 1) ) # Again, -1 bc no data for 1981
	names(fiw3) <- fiwvars
	fiw4 <- t(fiw3)
	countries <- fiw4[1,]
	fiw5 <- fiw4[-1,]
	fiw6 <- melt(fiw5)
	fiwyears <- c(rep(1972:1980, each = 3), rep(1982:(thisyear - 1), each = 3))
	name <- rep(countries, each = length(fiwyears))
	year <- rep(fiwyears, times = length(countries))
	fiw7 <- cbind(name, year, fiw6)
	fiw7[,4] <- NULL
	names(fiw7) <- c("name", "year", "var", "value")

	civlibs <- subset(fiw7, var=="civlibs")
	civlibs[,3] <- NULL
	names(civlibs) <- c("name", "year", "civlibs")
	polrights <- subset(fiw7, var=="polrights")
	polrights[,3] <- NULL
	names(polrights) <- c("name", "year", "polrights")
	status <- subset(fiw7, var=="status")
	status[,3] <- NULL
	names(status) <- c("name", "year", "status")
	fiw8 <- merge(civlibs, polrights)
	fiw9 <- merge(fiw8, status)

	fiw9$name <- as.character(fiw9$name)
	fiw9$civlibs <- as.numeric(as.character(fiw9$civlibs))
	fiw9$polrights <- as.numeric(as.character(fiw9$polrights))
	fiw9$status <- as.character(fiw9$status)
	fiw10 <- subset(fiw9, is.na(civlibs)==FALSE)
	fiw <- fiw10

	rm(fiw2,fiw3,fiw4,fiw5,fiw6,fiw7,fiw8,fiw9,fiw10)

	# After running this script, you'll probably want to add ID codes
	# for countries to facilitate merging with other TSCS data. The
	# 'countrycode' package is useful here. For example, to get
	# Correlates of War numeric codes, you could run:

	library(countrycode)
	fiw$ccode <- countrycode(fiw$name, "country.name", "cown", warn = TRUE)

	# Note that this process usually requires some massaging to get
	# matches for all cases. One way to do this is to open countrycode_data,
	# find the names it's using for the cases that don't match, replace
	# the relevant country names in 'fiw' with the ones countrycode wants,
	# and iterate this until all names match properly.