Skip to content

Instantly share code, notes, and snippets.

@briatte
Last active December 20, 2015 12:39
Show Gist options
  • Save briatte/6132935 to your computer and use it in GitHub Desktop.
Save briatte/6132935 to your computer and use it in GitHub Desktop.
extract variable labels from QOG data, save as R data frames

This Gist is used to create the variable indexes in this package:

  1. Stata do-file to extract the variable labels
  2. R script to convert the files to R data frames.
  3. R script to create the demo datasets.

The scripts all worked in early August 2013.

Exceptions corrected by the do-file:

  • to ' (\226)
  • ...
//
//
//
// QOG Social Policy (both cs and ts, with fixes)
cap pr drop qogsoc_fixes
pr qogsoc_fixes
// encoding fixes
la var ar_ue "Unemployment rate as % of civilian labour force, 1995-2002 (OECD, Kolodko 2000)."
la var dpi_dhondt "D'Hondt"
la var hu_ecr "Employers' contributions revenue"
la var hu_facr "State funds and other authorities' contributions revenue"
la var hu_wcr "Workers' contributions revenue"
la var iaep_lrit "Legislature's Ratification of International Treaties"
la var wvs_e040 "Hard work doesn't bring success"
di as txt "fixes done."
end
cap file close fh
use "~/Documents/Research/Data/QOG/QOG Social Policy/qog-soc-ts-long-v4apr12.dta", clear
qogsoc_fixes
file open fh using "~/Documents/Code/R/qogdata/qog.soc.ts.labels.txt", write replace
file write fh "variable;label;ts.min;ts.max;ts.N;ts.T"
foreach v of varlist * {
local sLabel: variable label `v'
qui su year if !mi(`v')
local min = r(min)
local max = r(max)
local N = r(N)
local T = `max' - `min' + 1
file write fh _n "`v';`sLabel';`min';`max';`N';`T'"
}
file close fh
cap file close fh
use "~/Documents/Research/Data/QOG/QOG Social Policy/qog-soc-cs-v4apr12.dta", clear
qogsoc_fixes
file open fh using "~/Documents/Code/R/qogdata/qog.soc.cs.labels.txt", write replace
file write fh "variable;label;cs.N"
foreach v of varlist * {
local sLabel: variable label `v'
qui su `v' if !mi(`v')
local N = r(N)
file write fh _n "`v';`sLabel';`N'"
}
file close fh
//
//
//
// QOG Standard (missing cs/no-ts labels)
cap pr drop qogstd_fixes
pr qogstd_fixes
// encoding fixes
la var gd_ptsa "Political Terror Scale - Amnesty International"
la var gd_ptss "Political Terror Scale - US State Department"
// ts-only
cap la var iaep_basp "Banning of 'Anti-System' Parties"
cap la var iaep_lrit "Legislature's Ratification of International Treaties"
cap la var jw_mmdballot "Party Control over Ballot - MMD(lower/only house)"
cap la var jw_mmdballot2 "Party Control over Ballot - MMD(upper house)"
cap la var jw_mmdpool "Sharing of Votes among Candidates - MMD(lower/only house)"
cap la var jw_mmdpool2 "Sharing of Votes among Candidates - MMD(upper house)"
cap la var jw_mmdvote "Candidate or Party-specific Voting - MMD(lower/only house)"
cap la var jw_mmdvote2 "Candidate or Party-specific Voting - MMD(upper house)"
cap la var jw_smdballot "Party Control over Ballot - SMD(lower/only house)"
cap la var jw_smdballot2 "Party Control over Ballot - SMD(upper house)"
cap la var jw_smdpool "Sharing of Votes among Candidates - SMD(lower/only house)"
cap la var jw_smdpool2 "Sharing of Votes among Candidates - SMD(upper house)"
cap la var jw_smdvote "Candidate or Party-specific Voting - SMD(lower/only house)"
cap la var jw_smdvote2 "Candidate or Party-specific Voting - SMD(upper house)"
// cs-only
cap la var qs_closed_cih "Closed Public Administration - Confidence Interval (High)"
cap la var qs_closed_cil "Closed Public Administration - Confidence Interval (Low)"
cap la var qs_impar_cih "Impartial Public Administration - Confidence Interval (High)"
cap la var qs_impar_cil "Impartial Public Administration - Confidence Interval (Low)"
cap la var qs_proff_cih "Professional Public Administration - Confidence Interval (High)"
cap la var qs_proff_cil "Professional Public Administration - Confidence Interval (Low)"
la var ti_cpi_min "Corruption Perceptions Index - Min Range"
la var ti_cpi_max "Corruption Perceptions Index - Max Range"
la var ti_cpi_sd "Corruption Perceptions Index - Standard Deviation"
di as txt "fixes done."
end
cap file close fh
use "~/Documents/Research/Data/QOG/QOG Standard 2013/QoG_std_ts_15May13.dta", clear
qogstd_fixes
file open fh using "~/Documents/Code/R/qogdata/qog.std.ts.labels.txt", write replace
file write fh "variable;label;ts.min;ts.max;ts.N;ts.T"
foreach v of varlist * {
local sLabel: variable label `v'
qui su year if !mi(`v')
local min = r(min)
local max = r(max)
local N = r(N)
local T = `max' - `min' + 1
file write fh _n "`v';`sLabel';`min';`max';`N';`T'"
}
file close fh
cap file close fh
use "~/Documents/Research/Data/QOG/QOG Standard 2013/QoG_std_cs_15May13.dta", clear
qogstd_fixes
file open fh using "~/Documents/Code/R/qogdata/qog.std.cs.labels.txt", write replace
file write fh "variable;label;cs.N"
foreach v of varlist * {
local sLabel: variable label `v'
qui su `v' if !mi(`v')
local N = r(N)
// cap necessary due to encoding issue on one line
cap file write fh _n "`v';`sLabel';`N'"
}
file close fh
//
//
//
// QOG Basic (missing cs/no-ts labels)
cap file close fh
use "~/Documents/Research/Data/QOG/QOG Basic/qog-basic-2012-ts.dta", clear
***** INSERT FIXES ****
file open fh using "~/Documents/Code/R/qogdata/qog.bas.ts.labels.txt", write replace
file write fh "variable;label;ts.min;ts.max;ts.N;ts.T"
foreach v of varlist * {
local sLabel: variable label `v'
qui su year if !mi(`v')
local min = r(min)
local max = r(max)
local N = r(N)
local T = `max' - `min' + 1
file write fh _n "`v';`sLabel';`min';`max';`N';`T'"
}
file close fh
cap file close fh
use "~/Documents/Research/Data/QOG/QOG Basic/qog-basic-2012-cs.dta", clear
***** INSERT FIXES ****
file open fh using "~/Documents/Code/R/qogdata/qog.bas.cs.labels.txt", write replace
file write fh "variable;label;cs.N"
foreach v of varlist * {
local sLabel: variable label `v'
qui su `v' if !mi(`v')
local N = r(N)
file write fh _n "`v';`sLabel';`N'"
}
file close fh
// done
#
#
# QOG Social Policy
#
QOG.soc.ts.labels = read.csv("qog.soc.ts.labels.txt",
sep = ";", na.strings = c("NA", ""), stringsAsFactors = F)
QOG.soc.cs.labels = read.csv("qog.soc.cs.labels.txt",
sep = ";", na.strings = c("NA", ""), stringsAsFactors = F)
QOG.soc.ts.labels$label = iconv(QOG.soc.ts.labels$label, to = "ASCII//TRANSLIT")
QOG.soc.cs.labels$label = iconv(QOG.soc.cs.labels$label, to = "ASCII//TRANSLIT")
QOG.soc.labels = merge(QOG.soc.ts.labels, QOG.soc.cs.labels,
by = "variable", all = TRUE)
QOG.soc.labels = subset(QOG.soc.labels, !is.na(cs.N) | !is.na(ts.N))
# check: TRUE
nrow(QOG.soc.labels) == length(unique(QOG.soc.labels$variable))
# check: empty
QOG.soc.labels[is.na(QOG.soc.labels$label.x) & is.na(QOG.soc.labels$label.y), ]
QOG.soc.labels$label = QOG.soc.labels$label.y
QOG.soc.labels$label[is.na(QOG.soc.labels$label.y)] = QOG.soc.labels$label.x[is.na(QOG.soc.labels$label.y)]
# check: FALSE
table(is.na(QOG.soc.labels$label))
vars = c("variable", "label", "cs.N", "ts.N", "ts.min", "ts.max", "ts.T")
qog.soc.index = QOG.soc.labels[, vars]
save(qog.soc.index, file = "data/qog.soc.index.rda")
#
#
# QOG Standard
#
QOG.std.ts.labels = read.csv("qog.std.ts.labels.txt",
sep = ";", na.strings = c("NA", ""), stringsAsFactors = F)
QOG.std.cs.labels = read.csv("qog.std.cs.labels.txt",
sep = ";", na.strings = c("NA", ""), stringsAsFactors = F)
QOG.std.ts.labels$label = iconv(QOG.std.ts.labels$label, to = "ASCII//TRANSLIT")
QOG.std.cs.labels$label = iconv(QOG.std.cs.labels$label, to = "ASCII//TRANSLIT")
QOG.std.labels = merge(QOG.std.ts.labels, QOG.std.cs.labels,
by = "variable", all = TRUE)
QOG.std.labels = subset(QOG.std.labels, !is.na(cs.N) | !is.na(ts.N))
# check: TRUE
nrow(QOG.std.labels) == length(unique(QOG.std.labels$variable))
# check: empty
QOG.std.labels[is.na(QOG.std.labels$label.x) & is.na(QOG.std.labels$label.y), ]
QOG.std.labels$label = QOG.std.labels$label.y
QOG.std.labels$label[is.na(QOG.std.labels$label.y)] = QOG.std.labels$label.x[is.na(QOG.std.labels$label.y)]
# check: FALSE
table(is.na(QOG.std.labels$label))
vars = c("variable", "label", "cs.N", "ts.N", "ts.min", "ts.max", "ts.T")
qog.std.index = QOG.std.labels[, vars]
QOG.csyom = qogdata(tempfile(fileext = ".csv"),
format = "csyom")
csyom = data.frame(cs.min = sapply(names(QOG.csyom),
function(x) min(na.omit(as.numeric(QOG.csyom[, x])))),
cs.max = sapply(names(QOG.csyom),
function(x) max(na.omit(as.numeric(QOG.csyom[, x])))))
csyom$cs.min[1:6] = NA
csyom$cs.max[1:6] = NA
csyom$variable = gsub("_yom", "", rownames(csyom))
qog.std.index = merge(qog.std.index, csyom, by = "variable", all = TRUE, sort = FALSE)
vars = c("variable", "label", "cs.N", "cs.min", "cs.max", "ts.N", "ts.min", "ts.max", "ts.T")
qog.std.index = qog.std.index[, vars]
save(qog.std.index, file = "data/qog.std.index.rda")
#
#
# QOG Basic
#
QOG.bas.ts.labels = read.csv("qog.bas.ts.labels.txt",
sep = ";", na.strings = c("NA", ""), stringsAsFactors = F)
QOG.bas.cs.labels = read.csv("qog.bas.cs.labels.txt",
sep = ";", na.strings = c("NA", ""), stringsAsFactors = F)
QOG.bas.ts.labels$label = iconv(QOG.bas.ts.labels$label, to = "ASCII//TRANSLIT")
QOG.bas.cs.labels$label = iconv(QOG.bas.cs.labels$label, to = "ASCII//TRANSLIT")
QOG.bas.labels = merge(QOG.bas.ts.labels, QOG.bas.cs.labels,
by = "variable", all = TRUE)
QOG.bas.labels = subset(QOG.bas.labels, !is.na(cs.N) | !is.na(ts.N))
# check: TRUE
nrow(QOG.bas.labels) == length(unique(QOG.bas.labels$variable))
# check: empty
QOG.bas.labels[is.na(QOG.bas.labels$label.x) & is.na(QOG.bas.labels$label.y), ]
QOG.bas.labels$label = QOG.bas.labels$label.y
QOG.bas.labels$label[is.na(QOG.bas.labels$label.y)] = QOG.bas.labels$label.x[is.na(QOG.bas.labels$label.y)]
# check: FALSE
table(is.na(QOG.bas.labels$label))
vars = c("variable", "label", "cs.N", "ts.N", "ts.min", "ts.max", "ts.T")
qog.bas.index = QOG.bas.labels[, vars]
save(qog.bas.index, file = "data/qog.bas.index.rda")
# sic transit data mundi
require(xtdata)
qog.ts.demo = qogdata(file = "qog.ts.dta", format = "ts",
variables = c("year", "ccode", "ccodealp", "cname", "unna_pop",
"wdi_fr", "wdi_gdpc", "wdi_hec", "chga_hinst",
"bl_asy25mf", "bl_asy15f", "bl_asy15m"))
qog.cs.demo = qogdata(file = "qog.cs.dta",
variables = c("year", "ccode", "ccodealp", "cname", "unna_pop",
"wdi_fr", "wdi_gdpc", "wdi_hec", "chga_hinst",
"bl_asy25mf", "bl_asy15f", "bl_asy15m"))
names(qog.ts.demo)
names(qog.cs.demo)
save(qog.ts.demo, qog.cs.demo, file = "data/qog.demo.rda")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment