Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created October 28, 2020 13:47
Show Gist options
  • Save explodecomputer/8eab43b782c778d05566042dae698906 to your computer and use it in GitHub Desktop.
Save explodecomputer/8eab43b782c778d05566042dae698906 to your computer and use it in GitHub Desktop.
simple_selection_sim
# sample size
n <- 10000
# simulate x and y variables - no association
x <- rnorm(n)
y <- rnorm(n)
# selected into the sample is influenced by x and y
sel <- rbinom(n, 1, plogis(x + y))
# create weights using x and y
prob <- fitted.values(glm(sel ~ x + y, family="binomial"))
summary(lm(y ~ x))
summary(lm(y ~ x, subset=sel==1))
summary(lm(y ~ x, subset=sel==1, weight=1/prob))
# create weights using only x
# i.e. assume we don't know what the influence of y is on being selected
prob <- fitted.values(glm(sel ~ x, family="binomial"))
summary(lm(y ~ x, subset=sel==1, weight=1/prob))
# weighted mean
mean(x)
mean(x[sel==1])
w <- fitted.values(glm(sel ~ x, family="binomial"))
weighted.mean(x[sel==1], w=1/w[sel==1])
w <- fitted.values(glm(sel ~ x + y, family="binomial"))
weighted.mean(x[sel==1], w=1/w[sel==1])
# weighted mean of subgroups
# binarise y for simplicity
yb <- as.numeric(y > mean(y))
mean(x[yb==1])
mean(x[yb==0])
# in selected
mean(x[sel==1 & yb==1])
mean(x[sel==1 & yb==0])
# create weight using just x
w <- fitted.values(glm(sel ~ x, family="binomial"))
weighted.mean(x[sel==1 & yb==1], w=1/w[sel==1 & yb==1])
weighted.mean(x[sel==1 & yb==0], w=1/w[sel==1 & yb==0])
# create weight using x and y
w <- fitted.values(glm(sel ~ x + y, family="binomial"))
weighted.mean(x[sel==1 & yb==1], w=1/w[sel==1 & yb==1])
weighted.mean(x[sel==1 & yb==0], w=1/w[sel==1 & yb==0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment