bschneidr · November 11, 2023 10:19 · markrodonovan · Nov 11, 2023
diff --git a/svy_prop_with_wilson_ci.R b/svy_prop_with_wilson_ci.R
 #' @title Wilson's confidence interval for complex survey designs
 #' @description Calculate Wilson's confidence interval for a proportion,
 #' with the effective sample size determined using a design-unbiased
 #' estimate of the complex survey design effect.
 #'
 #' @param x A formula, vector, or matrix.
 #' @param design A survey.design or svyrep.design object
 #' @param na.rm Should cases with missing values be dropped?
 #' @param level The confidence level required
 #' @param ... Additional arguments to pass on to \code{svymean()}
 #'
 #' @return An object of class 'svyciprop'. 
 #' Use \code{as.numeric(coef(result))} to extract the point estimates,
 #' and use \code{confint()} to extract the confidence intervals.
 #' The variance-covariance matrix, standard errors, and design effects
 #' can be extracted using the usual methods.
 #' @export
 #'
 #' @examples
 #' library(survey)
 #' 
 #' data(api)
 #' dclus1 <- svydesign(id=~dnum, fpc=~fpc, data=apiclus1)
 #' 
 #' estimates <- svy_prop_with_wilson_ci(x = ~ stype,
 #'                                      na.rm = TRUE,
 #'                                      design = dclus1,
 #'                                      level = 0.95)
 #' confint(estimates)
 #' as.numeric(coef(estimates))
 #' SE(estimates)
 #' vcov(estimates)
 #' deff(estimates)
 #'
 #' # Example with `svyby()`
 #' svyby(~ awards, by = ~ stype, design = dclus1,
 #'       FUN = svy_prop_with_wilson_ci, vartype = c("ci", "se"))
 #'      
 svy_prop_with_wilson_ci <- function(x, design, na.rm = TRUE, level = 0.95, ...) {

  # Point estimates, standard errors, and design effects
  estimate <- survey::svymean(x = x, na.rm = na.rm,
                              design = design,
                              ...)
  
  # Get estimated proportions
  p_hat <- coef(estimate)
  q_hat <- 1 - p_hat
  
  # Estimate effective sample size
  n_eff <- (p_hat*q_hat)/diag(vcov(estimate))
  # Estimate effective number of successes
  n_successes_eff <- p_hat * n_eff
  if (n_eff == 0) {
    stop("Estimated effective sample size is '0' due to estimated proportion of 0")
  }
  
  # Determine z statistic to use
  alpha <- (1 - level)
  half_alpha <- alpha/2
  z <- qnorm(p = half_alpha, lower.tail = FALSE)
  
  # Calculate confidence interval
  center <- (n_successes_eff + (z^2)/2)/(n_eff + (z^2))
  width <- (z * sqrt(n_eff))/(n_eff + (z^2))
  width <- width * sqrt((p_hat*q_hat) + (z^2)/(4*n_eff))
  
  lower <- center - width
  upper <- center + width
  ci <- rbind(lower, upper)
  rownames(ci) <- paste(round(c(half_alpha, level + half_alpha) * 
                                100, 1), "%", sep = "")
  colnames(ci) <- NULL
  
  # Wrap up the result as an object of class 'svyciprop'
  # so that S3 methods from the survey package can be used
  # (e.g. print(), confint(), SE(), deff(), etc.)
  result <- p_hat
  attr(result, "var") <- vcov(estimate)
  attr(result, 'deff') <- attr(estimate, 'deff')
  attr(result, 'ci') <- ci
  class(result) <- 'svyciprop'
  return(result)
 }
	#' @title Wilson's confidence interval for complex survey designs
	#' @description Calculate Wilson's confidence interval for a proportion,
	#' with the effective sample size determined using a design-unbiased
	#' estimate of the complex survey design effect.
	#'
	#' @param x A formula, vector, or matrix.
	#' @param design A survey.design or svyrep.design object
	#' @param na.rm Should cases with missing values be dropped?
	#' @param level The confidence level required
	#' @param ... Additional arguments to pass on to \code{svymean()}
	#'
	#' @return An object of class 'svyciprop'.
	#' Use \code{as.numeric(coef(result))} to extract the point estimates,
	#' and use \code{confint()} to extract the confidence intervals.
	#' The variance-covariance matrix, standard errors, and design effects
	#' can be extracted using the usual methods.
	#' @export
	#'
	#' @examples
	#' library(survey)
	#'
	#' data(api)
	#' dclus1 <- svydesign(id=~dnum, fpc=~fpc, data=apiclus1)
	#'
	#' estimates <- svy_prop_with_wilson_ci(x = ~ stype,
	#' na.rm = TRUE,
	#' design = dclus1,
	#' level = 0.95)
	#' confint(estimates)
	#' as.numeric(coef(estimates))
	#' SE(estimates)
	#' vcov(estimates)
	#' deff(estimates)
	#'
	#' # Example with `svyby()`
	#' svyby(~ awards, by = ~ stype, design = dclus1,
	#' FUN = svy_prop_with_wilson_ci, vartype = c("ci", "se"))
	#'
	svy_prop_with_wilson_ci <- function(x, design, na.rm = TRUE, level = 0.95, ...) {

	# Point estimates, standard errors, and design effects
	estimate <- survey::svymean(x = x, na.rm = na.rm,
	design = design,
	...)

	# Get estimated proportions
	p_hat <- coef(estimate)
	q_hat <- 1 - p_hat

	# Estimate effective sample size
	n_eff <- (p_hat*q_hat)/diag(vcov(estimate))
	# Estimate effective number of successes
	n_successes_eff <- p_hat * n_eff
	if (n_eff == 0) {
	stop("Estimated effective sample size is '0' due to estimated proportion of 0")
	}

	# Determine z statistic to use
	alpha <- (1 - level)
	half_alpha <- alpha/2
	z <- qnorm(p = half_alpha, lower.tail = FALSE)

	# Calculate confidence interval
	center <- (n_successes_eff + (z^2)/2)/(n_eff + (z^2))
	width <- (z * sqrt(n_eff))/(n_eff + (z^2))
	width <- width * sqrt((p_hatq_hat) + (z^2)/(4n_eff))

	lower <- center - width
	upper <- center + width
	ci <- rbind(lower, upper)
	rownames(ci) <- paste(round(c(half_alpha, level + half_alpha) *
	100, 1), "%", sep = "")
	colnames(ci) <- NULL

	# Wrap up the result as an object of class 'svyciprop'
	# so that S3 methods from the survey package can be used
	# (e.g. print(), confint(), SE(), deff(), etc.)
	result <- p_hat
	attr(result, "var") <- vcov(estimate)
	attr(result, 'deff') <- attr(estimate, 'deff')
	attr(result, 'ci') <- ci
	class(result) <- 'svyciprop'
	return(result)
	}