Last active
February 24, 2021 13:24
-
-
Save acarril/270f0fd4e5ae8fff039216c3a9aad764 to your computer and use it in GitHub Desktop.
ECO539B - Homework 3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| setwd("/Users/alvaro/Dropbox/Princeton/2021-Spring/539B/03-IV/hw03") | |
| ### Load libraries | |
| library(tidyverse) | |
| library(haven) # import .dta | |
| library(sandwich) # vcovHC() | |
| library(clubSandwich) # vcovCR() | |
| library(dfadjust) | |
| library(progress) | |
| library(brew) | |
| ### Read and prepare data | |
| fam <- read_dta("famine.dta") | |
| fam <- fam %>% | |
| mutate( | |
| lgrain_pred_fam = lgrain_pred * famine, | |
| lgrain_pred_invfam = lgrain_pred * (1 - famine) | |
| ) | |
| fam_sub <- filter(fam, year >= 1953 & year <= 1965) | |
| ### Compute main regression and extract betas | |
| reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = fam) | |
| betas <- coef(reg)[2:3] | |
| reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = fam_sub) | |
| betas_sub <- coef(reg_sub)[2:3] | |
| ### (a) No clustering | |
| # (i), (ii) HC standard errors | |
| sigma_hc0 <- diag(sqrt(vcovHC(reg, type = "HC0")[2:3, 2:3])) | |
| sigma_hc1 <- diag(sqrt(vcovHC(reg, type = "HC1")[2:3, 2:3])) | |
| sigma_hc2 <- diag(sqrt(vcovHC(reg, type = "HC2")[2:3, 2:3])) | |
| sigma_hc0_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC0")[2:3, 2:3])) | |
| sigma_hc1_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC1")[2:3, 2:3])) | |
| sigma_hc2_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC2")[2:3, 2:3])) | |
| ### (b) With clustering | |
| # (i), (ii) CR standard errors | |
| sigma_cr0 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR0")[2:3, 2:3])) | |
| sigma_cr1 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR1")[2:3, 2:3])) | |
| sigma_cr2 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR2")[2:3, 2:3])) | |
| sigma_cr0_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR0")[2:3, 2:3])) | |
| sigma_cr1_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR1")[2:3, 2:3])) | |
| sigma_cr2_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR2")[2:3, 2:3])) | |
| ### (iii) Effective standard errors | |
| # to compute the effective degrees of freedom, we use the package by Imbens and Kolesar | |
| # (a) No clustering | |
| reg_adj <- dfadjustSE(reg, IK = F) | |
| df_eff <- reg_adj$coefficients[2:3,5] | |
| sigma_eff <- sigma_hc2 * qt(0.975, df = df_eff) / 1.96 | |
| reg_adj_sub <- dfadjustSE(reg_sub, IK = F) | |
| df_eff_sub <- reg_adj_sub$coefficients[2:3,5] | |
| sigma_eff_sub <- sigma_hc2_sub * qt(0.975, df = df_eff_sub) / 1.96 | |
| # (b) With clustering | |
| reg_cl_adj <- dfadjustSE(reg, clustervar = as.factor(fam$prov), IK = F) | |
| df_cl_eff <- reg_cl_adj$coefficients[2:3,5] | |
| sigma_cl_eff <- sigma_cr2 * qt(0.975, df = df_cl_eff) / 1.96 | |
| reg_cl_adj_sub <- dfadjustSE(reg_sub, clustervar = as.factor(fam_sub$prov), IK = F) | |
| df_cl_eff_sub <- reg_cl_adj_sub$coefficients[2:3,5] | |
| sigma_cl_eff_sub <- sigma_cr2_sub * qt(0.975, df = df_cl_eff_sub) / 1.96 | |
| ### (iv), (v) Boostrap | |
| B <- 50000 | |
| N <- dim(fam)[1] | |
| N_sub <- dim(fam_sub)[1] | |
| provs <- unique(fam$prov) | |
| provs_sub <- unique(fam_sub$prov) | |
| Nclusters <- length(provs) | |
| Nclusters_sub <- length(provs_sub) | |
| bs_estimates <- matrix(data = NA, nrow = B, ncol = 2) | |
| bs_estimates_sub <- matrix(data = NA, nrow = B, ncol = 2) | |
| bs_tstats <- matrix(data = NA, nrow = B, ncol = 2) | |
| bs_tstats_sub <- matrix(data = NA, nrow = B, ncol = 2) | |
| bs_estimates_c <- matrix(data = NA, nrow = B, ncol = 2) | |
| bs_tstats_c <- matrix(data = NA, nrow = B, ncol = 2) | |
| bs_estimates_c_sub <- matrix(data = NA, nrow = B, ncol = 2) | |
| bs_tstats_c_sub <- matrix(data = NA, nrow = B, ncol = 2) | |
| pb <- progress_bar$new(total = B, format = "[:bar] :current/:total (:percent)") | |
| for (b in 1:B){ | |
| pb$tick() | |
| # (a) No clustering | |
| dat <- sample_n(fam, size = N, replace = TRUE) | |
| dat_sub <- sample_n(fam_sub, size = N_sub, replace = TRUE) | |
| bs_reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat) | |
| bs_reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat_sub) | |
| for (c in 2:3){ | |
| bs_estimates[b, c-1] <- coef(bs_reg)[c] | |
| bs_estimates_sub[b, c-1] <- coef(bs_reg_sub)[c] | |
| bs_tstats[b, c-1] <- sqrt(N) * (coef(bs_reg)[c] - coef(reg)[c]) / sqrt(vcovHC(bs_reg, type = "HC1")[c, c]) | |
| bs_tstats_sub[b, c-1] <- sqrt(N_sub) * (coef(bs_reg_sub)[c] - coef(reg_sub)[c]) / sqrt(vcovHC(bs_reg_sub, type = "HC1")[c,c]) | |
| } | |
| # (b) With clustering | |
| provb <- sample(provs, size = Nclusters, replace = T) | |
| provb_sub <- sample(provs_sub, size = Nclusters_sub, replace = T) | |
| dat <- filter(fam, prov %in% provb) | |
| dat_sub <- filter(fam_sub, prov %in% provb_sub) | |
| bs_reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat) | |
| bs_reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat_sub) | |
| for (c in 2:3){ | |
| bs_estimates_c [b, c-1] <- coef(bs_reg)[c] | |
| bs_estimates_c_sub[b, c-1] <- coef(bs_reg_sub)[c] | |
| bs_tstats_c [b, c-1] <- sqrt(N) * (coef(bs_reg)[c] - coef(reg)[c]) / sqrt(vcovCR(bs_reg, cluster = dat$prov, type = "CR1")[c,c]) | |
| bs_tstats_c_sub [b, c-1] <- sqrt(N_sub) * (coef(bs_reg_sub)[c] - coef(reg_sub)[c]) / sqrt(vcovCR(bs_reg_sub, cluster = dat_sub$prov, type = "CR1")[c, c]) | |
| } | |
| } | |
| # compute se (no cluster) | |
| bs_sigma <- c(sd(bs_estimates[, 1]), sd(bs_estimates[, 2])) | |
| names(bs_sigma) <- names(coef(reg)[2:3]) | |
| bs_sigma_sub <- c(sd(bs_estimates_sub[, 1]), sd(bs_estimates_sub[, 2])) | |
| names(bs_sigma_sub) <- names(coef(reg_sub)[2:3]) | |
| # compute se (cluster) | |
| bs_sigma_cl <- c(sd(bs_estimates_c[,1]), sd(bs_estimates_c[,2])) | |
| names(bs_sigma_cl) <- names(coef(reg)[2:3]) | |
| bs_sigma_cl_sub <- c(sd(bs_estimates_c_sub[,1]), sd(bs_estimates_c_sub[,2])) | |
| names(bs_sigma_cl_sub) <- names(coef(reg_sub)[2:3]) | |
| # confidence intervals | |
| lb_all <- numeric(2) | |
| ub_all <- numeric(2) | |
| lb_sub <- numeric(2) | |
| ub_sub <- numeric(2) | |
| lb_all_c <- numeric(2) | |
| ub_all_c <- numeric(2) | |
| lb_sub_c <- numeric(2) | |
| ub_sub_c <- numeric(2) | |
| for (b in 1:2){ | |
| lb_all [b] = betas[b] - quantile(bs_tstats[,b], probs = 1 - 0.05 / 2) * sigma_hc1[b] / sqrt(N) | |
| ub_all [b] = betas[b] - quantile(bs_tstats[,b], probs = 0.05 / 2) * sigma_hc1[b] / sqrt(N) | |
| lb_sub [b] = betas_sub[b] - quantile(bs_tstats_sub[,b], probs = 1 - 0.05 / 2) * sigma_hc1_sub[b] / sqrt(N_sub) | |
| ub_sub [b] = betas_sub[b] - quantile(bs_tstats_sub[,b], probs = 0.05 / 2) * sigma_hc1_sub[b] / sqrt(N_sub) | |
| lb_all_c[b] = betas[b] - quantile(bs_tstats_c[,b], probs = 1 - 0.05 / 2) * sigma_cr1[b] / sqrt(N) | |
| ub_all_c[b] = betas[b] - quantile(bs_tstats_c[,b], probs = 0.05 / 2) * sigma_cr1[b] / sqrt(N) | |
| lb_sub_c[b] = betas_sub[b] - quantile(bs_tstats_c_sub[,b], probs = 1 - 0.05 / 2) * sigma_cr1_sub[b] / sqrt(N_sub) | |
| ub_sub_c[b] = betas_sub[b] - quantile(bs_tstats_c_sub[,b], probs = 0.05 / 2) * sigma_cr1_sub[b] / sqrt(N_sub) | |
| } | |
| # sink(file = "standard_errors.tex") | |
| # brew("SE_template.brew") | |
| # sink(file = NULL) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment