Note: This post is a summary of information paraphrased from an excellent blog post by Christian Sepulveda.
Create the app and download the necessary dependencies.
| # An addition | |
| 5 + 5 | |
| # A subtraction | |
| 5 - 5 | |
| # A multiplication | |
| 3 * 5 | |
| # A division |
| storage.vector <- NA | |
| # Function that assigns treatment/control depending on | |
| # propensity scores (assignment probabilities) | |
| experiment <- function(vector.of.probabilities = NULL) { | |
| k = 0 | |
| for (i in 1:length(vector.of.probabilities)) { | |
| if( | |
| sample(x = c(1,0), size = 1, prob = c(vector.of.probabilities[i], | |
| 1 - vector.of.probabilities[i])) == 1) { |
| ################ PRELIMINARIES | |
| library(MASS) | |
| data(Pima.tr) | |
| library(tree) | |
| library(randomForest) | |
| ## STEP 1: Logistic regression ## | |
| logistic_reg <- glm(type ~ ., data = Pima.tr, family = binomial) # basic model | |
| predict_logistic.tr <- predict(logistic_reg, type = "response") # predicted probabilities (TRAINING SET) |
| # EXERCISE TO BUILD INTUITION FOR CORRELATED VS. UNCORRELATED DATA | |
| # PLEASE FOCUS ON UNDERSTANDING THE BELOW | |
| ### DO NOT JUST EXECUTE ALL THE CODE IN ONE BATCH--RUN IT LINE BY LINE... | |
| ### Simulation of analysis on correlated data | |
| set.seed(1314) | |
| nsims <- 10000 |
| library(boot) | |
| #estimate the mean via bootstrapping | |
| boot.mean <- function(data,index) return(mean(data[index])) | |
| #calculate the CI via t-distribution | |
| t.dist.ci <- function(samp) { | |
| df <- length(samp) - 1 | |
| factors <- qt(c(0.025, 0.975), df = df) | |
| samp.mean <- mean(samp) |
| library(haven) | |
| library(arm) | |
| df <- read_dta("C:/Users/Vinic/Downloads/turnout.dta") | |
| View(df) | |
| df[1,1] | |
| df[1,] | |
| lm2 <- glm(turnout ~ ., data = df, family = binomial) | |
| summary(lm2) |
| set.seed(20181001) | |
| ### LOOCV | |
| # Load packages and data | |
| library(Matching) | |
| library(boot) | |
| data(lalonde) | |
| # Train your model on ALL the data -- Use glm instead of lm |
| library(haven) | |
| df <- read_dta("C:/Users/Vinic/Downloads/turnout.dta") | |
| View(df) | |
| df[1,1] | |
| df[1,] | |
| lm2 <- glm(turnout ~ ., data = df, family = binomial) | |
| summary(lm2) | |
Create the app and download the necessary dependencies.