Jsevillamol · June 22, 2020 18:15
diff --git a/alesina_2013.R b/alesina_2013.R
 library(haven)
 library(dplyr)
 library(lfe)

 # Inline string concatenation
 `%+%` <- function(a, b) paste(a, b, sep="")

 # Moran statistic
 library(ape)
 my_moran <- function(residuals_input, coordinates){
  ww <- as.matrix(dist(coordinates))
  ww <- 1/ww
  diag(ww) <- 0
  ww[which(!is.finite(ww))] <- 0
  
  Moran.I(residuals_input, ww, na.rm = TRUE)
 }

 # Choose variable to run code on and controls
 outcome = "ilf"
 exposure = "plow_m"

 individual_controls = 
  " + age + age_sq + high_sc + less_hs" %+%
  " + my_data$single +  factor(year) + factor(metro)"

 historical_country_controls = 
  " + agricultural_suitability_m + tropical_climate_m + large_animals_m" %+%
  " + economic_complexity_m + political_hierarchies_m"

 contemporary_country_controls = " + ln_income_m + ln_income2m"
  
 controls = 
  " + factor(statefip)"         %+%
  individual_controls           %+%
  historical_country_controls   %+%
  contemporary_country_controls

 # Prepare data
 my_data <- read_dta("R/datasets/alesina_giuliano_nunn_qje_2013_replication_materials/Replication_Materials/cps_dataset.dta")
 my_data <- my_data[complete.cases(pull(my_data, exposure)),]

 ## Add state latitude and longitude
 gaz_counties_national <- read.delim("~/R/datasets/2019_Gaz_counties_national.txt")

 gaz_counties_national$statefip <- floor(gaz_counties_national$GEOID/1000)
 gaz_counties_national <- gaz_counties_national %>% select(c(statefip, INTPTLAT, INTPTLONG))
 gaz_counties_national <- aggregate(gaz_counties_national, by=list(gaz_counties_national$statefip), FUN=mean)
 my_data <- merge(x=my_data, y=gaz_counties_national, by.x="statefip", by.y="statefip", all.x=TRUE)

 # Run regression
 f = outcome %+% " ~ " %+% exposure %+% controls %+% " | 0 | 0 | mbpl" 
 my_lm <- felm(as.formula(f), data=my_data, na.action=na.exclude)

 # Get some regression statistics
 n <- nobs(my_lm)
 R2 <- summary(my_lm)$adj.r.squared

 # Compute standardized beta
 r <- coef(summary(my_lm))[exposure, "Estimate"]
 s <- coef(summary(my_lm))[exposure, "Cluster s.e."]
 p <- coef(summary(my_lm))[exposure, 'Pr(>|t|)']

 sigma_x <- sd(pull(my_data, exposure), na.rm=TRUE)
 sigma_y <- sd(pull(my_data, outcome), na.rm=TRUE)

 cohen_d <- r / sigma_y
 cohen_ds <- s / sigma_y

 r_adjusted <- r * sigma_x / sigma_y
 s_adjusted <- s * sigma_x / sigma_y

 # Compute Moran
 ## Redo OLS without clusters
 f = outcome %+% " ~ " %+% exposure %+% controls
 my_lm <- lm(as.formula(f), data=my_data, na.action=na.exclude)

 ## Add residuals to data
 my_data$my_residuals <- resid(my_lm)

 ## Prune omitted values
 omitted <- c(my_lm$na.action)
 pruned_data <- my_data[-omitted, ]

 ## Select relevant columns
 relevant_data <- pruned_data %>% select(c(INTPTLAT, INTPTLONG, my_residuals))

 ## Aggregate data by ethnicity
 aggdata <-aggregate(relevant_data, by=list(pruned_data$mbpl), FUN=mean, na.rm=TRUE)

 coords <- cbind(aggdata$INTPTLAT, aggdata$INTPTLONG)

 moran_out <- my_moran(aggdata$my_residuals, coords)
 moran_z <- (moran_out$observed - moran_out$expected) / moran_out$sd
 moran_p <- moran_out$p.value

 # Print results
 print(sprintf('outcome = ' %+% outcome))
 print(sprintf('exposure = ' %+% exposure))
 print(sprintf('n = %d', n))
 print(sprintf('r = %.2f (%.2f)', r, s))
 print(sprintf('d = %.2f (%.2f)', cohen_d, cohen_ds))
 print(sprintf('beta = %.2f (%.2f)', r_adjusted, s_adjusted))
 print(sprintf('p = %.2f', p))
 print(sprintf('Adjusted R2 = %.2f', R2))
 print(sprintf("Moran's Z = %.2f", moran_z))
 print(sprintf("Moran's p = %.2f", moran_p))
	library(haven)
	library(dplyr)
	library(lfe)

	# Inline string concatenation
	`%+%` <- function(a, b) paste(a, b, sep="")

	# Moran statistic
	library(ape)
	my_moran <- function(residuals_input, coordinates){
	ww <- as.matrix(dist(coordinates))
	ww <- 1/ww
	diag(ww) <- 0
	ww[which(!is.finite(ww))] <- 0

	Moran.I(residuals_input, ww, na.rm = TRUE)
	}

	# Choose variable to run code on and controls
	outcome = "ilf"
	exposure = "plow_m"

	individual_controls =
	" + age + age_sq + high_sc + less_hs" %+%
	" + my_data$single + factor(year) + factor(metro)"

	historical_country_controls =
	" + agricultural_suitability_m + tropical_climate_m + large_animals_m" %+%
	" + economic_complexity_m + political_hierarchies_m"

	contemporary_country_controls = " + ln_income_m + ln_income2m"

	controls =
	" + factor(statefip)" %+%
	individual_controls %+%
	historical_country_controls %+%
	contemporary_country_controls

	# Prepare data
	my_data <- read_dta("R/datasets/alesina_giuliano_nunn_qje_2013_replication_materials/Replication_Materials/cps_dataset.dta")
	my_data <- my_data[complete.cases(pull(my_data, exposure)),]

	## Add state latitude and longitude
	gaz_counties_national <- read.delim("~/R/datasets/2019_Gaz_counties_national.txt")

	gaz_counties_national$statefip <- floor(gaz_counties_national$GEOID/1000)
	gaz_counties_national <- gaz_counties_national %>% select(c(statefip, INTPTLAT, INTPTLONG))
	gaz_counties_national <- aggregate(gaz_counties_national, by=list(gaz_counties_national$statefip), FUN=mean)
	my_data <- merge(x=my_data, y=gaz_counties_national, by.x="statefip", by.y="statefip", all.x=TRUE)

	# Run regression
	f = outcome %+% " ~ " %+% exposure %+% controls %+% " \| 0 \| 0 \| mbpl"
	my_lm <- felm(as.formula(f), data=my_data, na.action=na.exclude)

	# Get some regression statistics
	n <- nobs(my_lm)
	R2 <- summary(my_lm)$adj.r.squared

	# Compute standardized beta
	r <- coef(summary(my_lm))[exposure, "Estimate"]
	s <- coef(summary(my_lm))[exposure, "Cluster s.e."]
	p <- coef(summary(my_lm))[exposure, 'Pr(>\|t\|)']

	sigma_x <- sd(pull(my_data, exposure), na.rm=TRUE)
	sigma_y <- sd(pull(my_data, outcome), na.rm=TRUE)

	cohen_d <- r / sigma_y
	cohen_ds <- s / sigma_y

	r_adjusted <- r * sigma_x / sigma_y
	s_adjusted <- s * sigma_x / sigma_y

	# Compute Moran
	## Redo OLS without clusters
	f = outcome %+% " ~ " %+% exposure %+% controls
	my_lm <- lm(as.formula(f), data=my_data, na.action=na.exclude)

	## Add residuals to data
	my_data$my_residuals <- resid(my_lm)

	## Prune omitted values
	omitted <- c(my_lm$na.action)
	pruned_data <- my_data[-omitted, ]

	## Select relevant columns
	relevant_data <- pruned_data %>% select(c(INTPTLAT, INTPTLONG, my_residuals))

	## Aggregate data by ethnicity
	aggdata <-aggregate(relevant_data, by=list(pruned_data$mbpl), FUN=mean, na.rm=TRUE)

	coords <- cbind(aggdata$INTPTLAT, aggdata$INTPTLONG)

	moran_out <- my_moran(aggdata$my_residuals, coords)
	moran_z <- (moran_out$observed - moran_out$expected) / moran_out$sd
	moran_p <- moran_out$p.value

	# Print results
	print(sprintf('outcome = ' %+% outcome))
	print(sprintf('exposure = ' %+% exposure))
	print(sprintf('n = %d', n))
	print(sprintf('r = %.2f (%.2f)', r, s))
	print(sprintf('d = %.2f (%.2f)', cohen_d, cohen_ds))
	print(sprintf('beta = %.2f (%.2f)', r_adjusted, s_adjusted))
	print(sprintf('p = %.2f', p))
	print(sprintf('Adjusted R2 = %.2f', R2))
	print(sprintf("Moran's Z = %.2f", moran_z))
	print(sprintf("Moran's p = %.2f", moran_p))