briatte · December 16, 2015 20:10
diff --git a/stats.table.example.R b/stats.table.example.R
 require(countrycode)
 require(devtools)
 require(downloader)
 require(pastecs)

 ## GET DATA

 # download Quality of Government Standard dataset
 file = "data/qog-cs.txt"
 if(!file.exists(file)) {
  if(!file.exists(dta <- "data/qog-cs.dta")) {
    url = "http://www.qogdata.pol.gu.se/data/qog_std_cs.dta"
    download(url, dta, mode = "wb")
  }
  write.csv(read.dta(dta), file)
 }
 # open local copy
 data <- read.csv(file, stringsAsFactors = FALSE, header = TRUE)

 ## PREPARE DATA

 # extract variables
 data <- with(data, data.frame(
  country    = cname,                                       # country name
  ccode      = ccodealp,                                    # ISO-3C codes
  continent  = countrycode(ccodealp, "iso3c", "continent"), # UN continent
  gdpc       = wdi_gdpc / 10^3,                             # in 1,000 USD
  wrights    = ciri_wosoc,
  regime     = gol_polreg))

 # drop missing cases
 data <- na.omit(data)

 # Get stats.table() function.
 source_gist("5490761")

 ## EXPORT SUMMARY STATS

 # Example usage:
 # - continuous variables are in columns 4 and 5
 # - categorical variables are in columns 3 and 6
 # - remember to delete missing values first!
 stats.table(data, file = "summary.stats.txt",
            continuous = 4:5, categorical = c(3, 6))
diff --git a/stats.table.R b/stats.table.R
 stats.table <- function(data, continuous, categorical, 
                        file = "stats.txt", digits = 1) {
  if(!require(pastecs)) stop("Install.packages('pastecs') first.")

  # continuous summary (n, mean, sd, min, max) of selected variable columns
  con.stats <- round(t(stat.desc(data[, continuous]))[, c(1, 9, 13, 4:5)], digits)

  # categorical summary (n, frequencies) of selected variable columns
  cat.stats <- lapply(data[, categorical], FUN = function(x) {
    data.frame(x <- table(x), percent = round(100 * x / nrow(data), digits))[-3]
  })
  # grossly inefficient binding
  for(i in 1:length(names(cat.stats))) {
    if(i == 1) x <- NULL
    x <- rbind(x, c(names(cat.stats)[i], "N", "%"), as.matrix(cat.stats[[i]]))
  }
  # grossly inefficient binding (bis)
  x <- rbind(cbind(rownames(con.stats), con.stats),
             cbind(x, matrix(NA, nrow = nrow(x), ncol = 1 + ncol(con.stats) - ncol(x))))
  # simplistic formatting
  rownames(x) <- colnames(x) <- NULL
  write.table(data.frame(x), file = file, 
              na = "", sep = "\t", quote = FALSE, row.names = FALSE, 
              col.names = c("", "N", "Mean/%", "SD", "Min", "Max"))
  cat("Summary statistics written to", file, "\n")
 }
	require(countrycode)
	require(devtools)
	require(downloader)
	require(pastecs)

	## GET DATA

	# download Quality of Government Standard dataset
	file = "data/qog-cs.txt"
	if(!file.exists(file)) {
	if(!file.exists(dta <- "data/qog-cs.dta")) {
	url = "http://www.qogdata.pol.gu.se/data/qog_std_cs.dta"
	download(url, dta, mode = "wb")
	}
	write.csv(read.dta(dta), file)
	}
	# open local copy
	data <- read.csv(file, stringsAsFactors = FALSE, header = TRUE)

	## PREPARE DATA

	# extract variables
	data <- with(data, data.frame(
	country = cname, # country name
	ccode = ccodealp, # ISO-3C codes
	continent = countrycode(ccodealp, "iso3c", "continent"), # UN continent
	gdpc = wdi_gdpc / 10^3, # in 1,000 USD
	wrights = ciri_wosoc,
	regime = gol_polreg))

	# drop missing cases
	data <- na.omit(data)

	# Get stats.table() function.
	source_gist("5490761")

	## EXPORT SUMMARY STATS

	# Example usage:
	# - continuous variables are in columns 4 and 5
	# - categorical variables are in columns 3 and 6
	# - remember to delete missing values first!
	stats.table(data, file = "summary.stats.txt",
	continuous = 4:5, categorical = c(3, 6))
	stats.table <- function(data, continuous, categorical,
	file = "stats.txt", digits = 1) {
	if(!require(pastecs)) stop("Install.packages('pastecs') first.")

	# continuous summary (n, mean, sd, min, max) of selected variable columns
	con.stats <- round(t(stat.desc(data[, continuous]))[, c(1, 9, 13, 4:5)], digits)

	# categorical summary (n, frequencies) of selected variable columns
	cat.stats <- lapply(data[, categorical], FUN = function(x) {
	data.frame(x <- table(x), percent = round(100 * x / nrow(data), digits))[-3]
	})
	# grossly inefficient binding
	for(i in 1:length(names(cat.stats))) {
	if(i == 1) x <- NULL
	x <- rbind(x, c(names(cat.stats)[i], "N", "%"), as.matrix(cat.stats[[i]]))
	}
	# grossly inefficient binding (bis)
	x <- rbind(cbind(rownames(con.stats), con.stats),
	cbind(x, matrix(NA, nrow = nrow(x), ncol = 1 + ncol(con.stats) - ncol(x))))
	# simplistic formatting
	rownames(x) <- colnames(x) <- NULL
	write.table(data.frame(x), file = file,
	na = "", sep = "\t", quote = FALSE, row.names = FALSE,
	col.names = c("", "N", "Mean/%", "SD", "Min", "Max"))
	cat("Summary statistics written to", file, "\n")
	}