soodoku · May 31, 2015 22:52
diff --git a/prop_weights.R b/prop_weights.R
 "
 Weighting by Propensity Scores
 Last Edited: 5/31/2015

 Task Outline:
 1. Two datasets:
 	dataset 1: large pop. representative sample	
 	dataset 2: convenient sample
 2. Create weights for dataset 2 so that its marginals are close to dataset 1 on some vars.  

 How to:
 1. Standardize coding of variables you want to match on across datasets
 	- You can create a separate dummy variable for missing values for each variable in each dataset
 2. Follow steps below 
 "

 # Get some data
 library(anesrake); data(anes04)
 library(Zeling); data(voteincome)

 "
 Picking common cols + making data similar + add a column identifying which.data
 "

 # dat1
 data1 <- anes04[,c("female", "age", "educcats")]
 names(data1)[3] <- "education"
 data1$education[data1$education==5] <- 4
 data1 <- data.frame(pop=1, data1)

 # dat2
 data2 <- voteincome[,c("female", "age", "education")]
 data2 <- data.frame(pop=0, data2)

 # rbind data
 data <- rbind(data1, data2)

 # Fit Logistic 
 # Model this well
 library(splines)
 est  <- glm(I(pop==0) ~ female + ns(age,3)*as.factor(education), data=data, family=binomial(link="logit"))

 # Get prop. scores
 data$prop.scores <- est$linear.predictors

 # summary(data$prop.scores)

 # Break prop.scores into quintiles
 # Use control group (pop.) for this
 pop.quantiles <- quantile(data$prop.scores[data$pop == 1], probs = seq(0, 1, .2))
 data$quantile <- cut(data$prop.scores, breaks=pop.quantiles, include.lowest=TRUE)
 tbl <- prop.table(xtabs( ~ quantile + pop, data=data), 2)
 mult <- tbl[,1] / tbl[,2]

 # Create weights
 data$new.weight <- ifelse(data$pop == 1,  mult[as.integer(data$quantile)], 1)

 # Checking how the new weights work
 # Age
 summary(lm(age ~ pop, weight=new.weight, data=data))$coef[2,1]
 # -0.8366
 summary(lm(age ~ pop, data=data))$coef[2,1]
 #  -1.9891

 # Sex
 summary(lm(female ~ pop, weight=new.weight, data=data))$coef[2,1]
 # 0.009970668
 summary(lm(female ~ pop, data=data))$coef[2,1]
 #  -0.02633

 # Education
 summary(lm(education ~ pop, weight=new.weight, data=data))$coef[2,1]
 # [1] 0.05121831
 summary(lm(education ~ pop, data=data))$coef[2,1]
 # [1] 0.2702838
	"
	Weighting by Propensity Scores
	Last Edited: 5/31/2015

	Task Outline:
	1. Two datasets:
	dataset 1: large pop. representative sample
	dataset 2: convenient sample
	2. Create weights for dataset 2 so that its marginals are close to dataset 1 on some vars.

	How to:
	1. Standardize coding of variables you want to match on across datasets
	- You can create a separate dummy variable for missing values for each variable in each dataset
	2. Follow steps below
	"

	# Get some data
	library(anesrake); data(anes04)
	library(Zeling); data(voteincome)

	"
	Picking common cols + making data similar + add a column identifying which.data
	"

	# dat1
	data1 <- anes04[,c("female", "age", "educcats")]
	names(data1)[3] <- "education"
	data1$education[data1$education==5] <- 4
	data1 <- data.frame(pop=1, data1)

	# dat2
	data2 <- voteincome[,c("female", "age", "education")]
	data2 <- data.frame(pop=0, data2)

	# rbind data
	data <- rbind(data1, data2)

	# Fit Logistic
	# Model this well
	library(splines)
	est <- glm(I(pop==0) ~ female + ns(age,3)*as.factor(education), data=data, family=binomial(link="logit"))

	# Get prop. scores
	data$prop.scores <- est$linear.predictors

	# summary(data$prop.scores)

	# Break prop.scores into quintiles
	# Use control group (pop.) for this
	pop.quantiles <- quantile(data$prop.scores[data$pop == 1], probs = seq(0, 1, .2))
	data$quantile <- cut(data$prop.scores, breaks=pop.quantiles, include.lowest=TRUE)
	tbl <- prop.table(xtabs( ~ quantile + pop, data=data), 2)
	mult <- tbl[,1] / tbl[,2]

	# Create weights
	data$new.weight <- ifelse(data$pop == 1, mult[as.integer(data$quantile)], 1)

	# Checking how the new weights work
	# Age
	summary(lm(age ~ pop, weight=new.weight, data=data))$coef[2,1]
	# -0.8366
	summary(lm(age ~ pop, data=data))$coef[2,1]
	# -1.9891

	# Sex
	summary(lm(female ~ pop, weight=new.weight, data=data))$coef[2,1]
	# 0.009970668
	summary(lm(female ~ pop, data=data))$coef[2,1]
	# -0.02633

	# Education
	summary(lm(education ~ pop, weight=new.weight, data=data))$coef[2,1]
	# [1] 0.05121831
	summary(lm(education ~ pop, data=data))$coef[2,1]
	# [1] 0.2702838