ngreifer · July 9, 2024 20:25 · thomasklausch2 · Dec 20, 2020 · ngreifer · Dec 20, 2020
diff --git a/subclass_split.R b/subclass_split.R
 # Implements the subclass splitting algorithm described by Imbens & Rubin (2015, Sec 13.5)
 # Arguments:
 # - ps: a vector of (linearized) propensity scores
 # - z: a vector of treatment status (2 values, doesn't have to be 0/1)
 # - tmax: the threshold of the t-statistic used to determine whether imbalance remains and
 #         s plit should be formed. High values make splits less likely.
 # - minn: the minimum number of units of each treatment group allowed in each subclass
 # - focal: the treatment group where the subclass-wise median ps is computed; leave
 #          NULL to use the full sample
 #
 # Output: a factor variable containing subclass membership

 subclass_split <- function(ps, z, tmax = 1, minn = 3, focal = NULL) {
  
  k <- 1
  s <- rep(k, length(z))
  zz <- z[1]

  improved <- TRUE

  while (improved) {

    improved <- FALSE

    for (i in 1:k) {
      split <- TRUE
      m <- if (is.null(focal)) median(ps[s==i]) else median(ps[s==i & z == focal])

      #Check subclass size
      if (!all(c(sum(s == i & z == zz & ps < m),
                 sum(s == i & z == zz & ps >= m),
                 sum(s == i & z != zz & ps < m),
                 sum(s == i & z != zz & ps >= m)) >= minn))
        split <- FALSE

      #Check independence
      if (split) {
        l0 = mean(ps[z == zz & s == i])
        l1 = mean(ps[z != zz & s == i])
        v = (1/(sum(s == i) - 2))*(sum((ps[z == zz & s == i] - l0)^2) + sum((ps[z!= zz & s == i] - l1)^2))
        t = abs(l1 - l0)/sqrt(v*(1/sum(z == zz & s == i) + 1/sum(z != zz & s == i)))
        if (t <= tmax) split <- FALSE
      }

      if (split) {
        s[s==i & ps >= m] <- k
        k <- k + 1
        improved <- TRUE
      }
    }
  }

  s <- factor(s)
  levels(s) <- 1:(k - 1)

  return(s)
 }
	# Implements the subclass splitting algorithm described by Imbens & Rubin (2015, Sec 13.5)
	# Arguments:
	# - ps: a vector of (linearized) propensity scores
	# - z: a vector of treatment status (2 values, doesn't have to be 0/1)
	# - tmax: the threshold of the t-statistic used to determine whether imbalance remains and
	# s plit should be formed. High values make splits less likely.
	# - minn: the minimum number of units of each treatment group allowed in each subclass
	# - focal: the treatment group where the subclass-wise median ps is computed; leave
	# NULL to use the full sample
	#
	# Output: a factor variable containing subclass membership

	subclass_split <- function(ps, z, tmax = 1, minn = 3, focal = NULL) {

	k <- 1
	s <- rep(k, length(z))
	zz <- z[1]

	improved <- TRUE

	while (improved) {

	improved <- FALSE

	for (i in 1:k) {
	split <- TRUE
	m <- if (is.null(focal)) median(ps[s==i]) else median(ps[s==i & z == focal])

	#Check subclass size
	if (!all(c(sum(s == i & z == zz & ps < m),
	sum(s == i & z == zz & ps >= m),
	sum(s == i & z != zz & ps < m),
	sum(s == i & z != zz & ps >= m)) >= minn))
	split <- FALSE

	#Check independence
	if (split) {
	l0 = mean(ps[z == zz & s == i])
	l1 = mean(ps[z != zz & s == i])
	v = (1/(sum(s == i) - 2))*(sum((ps[z == zz & s == i] - l0)^2) + sum((ps[z!= zz & s == i] - l1)^2))
	t = abs(l1 - l0)/sqrt(v*(1/sum(z == zz & s == i) + 1/sum(z != zz & s == i)))
	if (t <= tmax) split <- FALSE
	}

	if (split) {
	s[s==i & ps >= m] <- k
	k <- k + 1
	improved <- TRUE
	}
	}
	}

	s <- factor(s)
	levels(s) <- 1:(k - 1)

	return(s)
	}