José A. Alvarez Cabrera josealvarez97

Prerequisites

Create the app and download the necessary dependencies.

	storage.vector <- NA

	# Function that assigns treatment/control depending on
	# propensity scores (assignment probabilities)
	experiment <- function(vector.of.probabilities = NULL) {
	k = 0
	for (i in 1:length(vector.of.probabilities)) {
	if(
	sample(x = c(1,0), size = 1, prob = c(vector.of.probabilities[i],
	1 - vector.of.probabilities[i])) == 1) {

	################ PRELIMINARIES
	library(MASS)
	data(Pima.tr)
	library(tree)
	library(randomForest)

	## STEP 1: Logistic regression ##
	logistic_reg <- glm(type ~ ., data = Pima.tr, family = binomial) # basic model
	predict_logistic.tr <- predict(logistic_reg, type = "response") # predicted probabilities (TRAINING SET)

	# EXERCISE TO BUILD INTUITION FOR CORRELATED VS. UNCORRELATED DATA
	# PLEASE FOCUS ON UNDERSTANDING THE BELOW
	### DO NOT JUST EXECUTE ALL THE CODE IN ONE BATCH--RUN IT LINE BY LINE...

	### Simulation of analysis on correlated data

	set.seed(1314)

	nsims <- 10000

	library(boot)

	#estimate the mean via bootstrapping
	boot.mean <- function(data,index) return(mean(data[index]))

	#calculate the CI via t-distribution
	t.dist.ci <- function(samp) {
	df <- length(samp) - 1
	factors <- qt(c(0.025, 0.975), df = df)
	samp.mean <- mean(samp)

	library(haven)
	library(arm)
	df <- read_dta("C:/Users/Vinic/Downloads/turnout.dta")
	View(df)
	df[1,1]
	df[1,]

	lm2 <- glm(turnout ~ ., data = df, family = binomial)
	summary(lm2)


	set.seed(20181001)
	### LOOCV

	# Load packages and data
	library(Matching)
	library(boot)
	data(lalonde)

	# Train your model on ALL the data -- Use glm instead of lm