BioSciEconomist · March 7, 2019 14:36
diff --git a/MatchIt demo.R b/MatchIt demo.R
 # *-----------------------------------------------------------------
 # | PROGRAM NAME: MatchIt demo.R
 # | DATE: 3/6/19 
 # | CREATED BY: MATT BOGARD 
 # | PROJECT FILE:         
 # *----------------------------------------------------------------
 # | PURPOSE: demo of R matchit based on: Matching as Nonparametric Preprocessing for
 # | Reducing Model Dependence in Parametric Causal Inference by Ho,Imai,King,and Stuart
 # | Political Analysis (2007) 15:199-236 See also: http://www.biostat.jhsph.edu/~estuart/
 # *----------------------------------------------------------------

 # NOTES: this is just a very short demo of the ps matching process using R MatchIt at a 
 # very high level. Additional data exploration and analysis would be necessary in 
 # a more realistic scenario

 # for additional options and examples (many to one matching, exact matching, optimal matching, 
 # subclassification, sensitivity analysis) see documentation: 
 # https://cran.r-project.org/web/packages/MatchIt/index.html 

 # Data: lalonde - used by Dehejia and Wahba (1999) to evaluate propensity score matching.

 # Ref: Dehejia, Rajeev and Sadek Wahba. 1999.`Causal Effects in Non-Experimental Studies: Re-Evaluating 
 # the Evaluation of Training Programs.'' Journal of the American Statistical Association 94 (448):
 # 1053-1062

 # Descriptions: 

 # treat - an indicator variable for treatment status.
 # age - age in years.
 # educ - years of schooling.
 # black - indicator variable for blacks.
 # hisp- indicator variable for Hispanics.
 # married - indicator variable for martial status.
 # nodegr - indicator variable for high school diploma.
 # re74 - real earnings in 1974.
 # re75 - real earnings in 1975.
 # re78  - real earnings in 1978.

 # settings and packages

 options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation

 library(MatchIt)
 library(dplyr)

 #-----------------------------
 # data loading and customization
 #-----------------------------

 data("lalonde") # load data
 head(lalonde) # check out first few rows

 df <- lalonde # resave data frame to play with

 ### lets do a little customization to this demo data set 

 # convert index to column - because I like my unique identifiers to actually be a column
 mbrid <- rownames(df) # extract identifiers from the rowname index
 df <- cbind(mbrid,df) # add to dataframe as a column
 df$mbrid <- as.character(df$mbrid) # character conversion
 head(df) # check


 #--------------------------------------------
 # assess unmatched data
 #-------------------------------------------

 # what is the raw difference in outcome re78 between treatment and controls
 df%>% 
  group_by(treat) %>%     # group by treatment
  summarize(AVGre78 = mean(re78)) 
 # treatment group appears to have lower income in 1978 (our outcome variable)

 # is this difference significant
 summary(lm(re78 ~  treat, data = df))

 # difference with controls
 summary(lm(re78 ~  treat + age + educ + black + hispan + married + nodegree + re74 + re75, data = df))

 # controlled comparisons using regression reverse the relationship with postive treatment effect
 # on the order of $1548 that is significant p < .05

 # note: regression is a form of matching in and of itself - the difference is related to
 # weighting, see: Mastering 'Metrics (2015) - same assumptions and identification strategy as matching

 # so why matching vs regression? Chris Blattman puts it well: http://chrisblattman.com/2010/10/27/the-cardinal-sin-of-matching/
 # "For causal inference, the most important difference between regression and matching is what 
 # observations count the most.....Matching might make sense if there are observations in your data 
 # that have no business being compared to one another, and in that way produce a better estimate"
 # w/o matching models may extrapolate over range of data that does not have both treatment and control units
 # i.e. lack of common support increases model dependence

 # 1) Matching helps reduce selection bias/confounding (comparing like with like) - create the kind of 
 #   balance or similarity between treatment and controls similar to a RCT 
 # 2) Matching reduces model dependence - results become less sensitive to functional form and model assumptions
 # 3) The matching process separates experimental design from modeling/analysis - more honest & doubly robust characteristics

 # KEY THEORY: 1) propensity scores are a balancing score - sample pre-processing to create similarity between
 #             treatment and controls. It is a characteristic of the OBSERVED SAMPLE not a hypothetical population 
 #             i.e. statistical tests are not relevant for checking balance or validity of propensity score model 
 #             - matching is an exercise in data processing/preprocessing not inference!!!! (Like BULL TV Show)
 #             2) 'conditional independence' - selection on observables, assumes we are controlling for all sources
 #             of bias based on what we can OBSERVE...have to recognize unobservable omitted variables can still bias our results



 #----------------------------------------------
 # match on propensity scores
 #---------------------------------------------


 # reference model  - we really don't need this this is just for reference
 summary(glm(treat ~ age + educ + black + hispan + married + nodegree + re74 + re75, family = "binomial", data = df))


 # match data using nearest neighbor 1:1 greedy matching using logistic regression and caliper = .10 sd(ps)
 set.seed(1234)
 m.out <- matchit(treat ~ age + educ + black + hispan + married + nodegree + re74 + re75, data=df,method= "nearest", ratio=1, 
                 distance= "logit", caliper=0.10, replace= F)

 summary(m.out) # check balance for each iteration above to determine which approach gives the most balance
 # (notice the improvement of matches with caliper = .10 vs .25)


 plot(m.out, type = "hist") # common support of matched groups
 plot(m.out, type = "jitter") # scatter plots of propensity scores common support


 # NOTES: you may consider tradeoffs - you may sacrafice balance among less important variables in order to have a 
 # larger sample size and allow regression with controls (after matching) extrapolate across those lose ends. You 
 # gain statistical power but also increase some sensitivity to functional form

 # if matches are not satisfactory you may consider changing your propensity score model specification.
 # adding higher order interaction terms or considering using machine learning and feeding scores into 
 # Matchit

 ### Obtain matched analysis file
 df1 <- match.data(m.out)


 #-----------------------------------------
 # estimating treatment effects
 #----------------------------------------

 # what is the difference between outcomes for matched data
 df1%>% 
  group_by(treat) %>%     # group by treatment
  summarize(AVGre78 = mean(re78)) 
 # treatment now has much higher value of outcome


 # regression
 summary(lm(re78 ~  treat, data = df1))

 # regression with added controls 
 summary(lm(re78 ~  treat + age + educ + black + hispan + married + nodegree + re74 + re75, data = df1))

 # regression adjustment does not remove much additional bias or improve significance

 # we could do a power analysis to see if reduced n from matching is an issue and get an 
 # idea of what we could do to increase sample size

 #------------------------------------------
 # power analysis
 #-----------------------------------------

 library(pwr)

 # reference: https://www.statmethods.net/stats/power.html

 # n1 and n2 are the sample sizes
 # effect size: d = |Mu1 - Mu2|/sigma
 # Cohen suggests that d values of 0.2, 0.5, and 0.8 represent small, medium, and large effect 

 Mu1 <- mean(df1$re78[df1$treat==1])
 Mu2 <- mean(df1$re78[df1$treat==0])
 sigma <- sd(df1$re78)
 d = abs(Mu1 - Mu2)/sigma

 # with the effects and sample sizes we have how much power do we have:
 pwr.t.test(n = 111 , d = d , sig.level = .05 , power = , type = c("paired")) # power = .38

 # how many pairs would we need to see this effect with 80% power: 
 # note without matching we still only have 
 pwr.t.test(n = , d = d , sig.level = .05 , power = .8 , type = c("paired")) # 309 we only have 111

 #---------------------------------------
 #
 #  examining matched observations
 #
 #---------------------------------------

 # although  "matching does not require pairing observations"...."only the distributions
 # need be matched as closely as possible" in some applications we want to pull pre and post period data for 
 # treatments and controls based on an index date representing the date of treatment for the treatment group. 
 # For control groups we typically 'assign' an index date based on the index date of the unit of observation 
 # they are matched to if they don't already have a plausible index date. Also, if we want to leverage
 # bootstrapped standard errors to make inferences we may want to do paired bootstraps - in these cases
 # we need to identfy 'matched pairs' to get the index date or do paired bootstrap sampling
 # 

 matchids <- data.frame(m.out$match.matrix) # extract matched pairs from Matchit output

 # convert index to column to get treatment id and rename 1st column as the control id
 trtid <- rownames(matchids)
 matchids <- cbind(trtid,matchids)%>%rename(ctrlid = X1)

 # format conversions
 matchids$trtid <- as.character(matchids$trtid)
 matchids$ctrlid <- as.character(matchids$ctrlid)

 # get only treatment data from matched output data to form a treatment cohort file
 trtchrt <- df1%>%filter(treat ==1)%>%rename(pstrt = distance, Y_trt = re78)
 trtchrt$trtid <- trtchrt$mbrid
 trtchrt<- trtchrt%>%select(mbrid,trtid,treat,Y_trt,pstrt)

 # from matchids add the corresponding control id to the treatment cohort file
 trtchrt <- trtchrt%>%left_join(select(matchids,ctrlid,trtid), by = "trtid")

 # get only control data from matched output data to form a contrl cohort file with their ps and id
 ctrlchrt <- df1%>%filter(treat ==0)%>%rename(psctrl = distance, Y_ctrl = re78) # filter controls and rename ps variable
 ctrlchrt$ctrlid <- ctrlchrt$mbrid # create ctrl id
 ctrlchrt <- ctrlchrt%>%select(mbrid,ctrlid,treat,Y_ctrl,psctrl)

 # add the matched control's ps and outcome to the treatment cohort file joining on the ctrlid
 trt_ctrl_chrt <- trtchrt%>%left_join(select(ctrlchrt,ctrlid,psctrl,Y_ctrl), by = "ctrlid")


 # did we conserve our matched pairs from matchedids?

 head(matchids)
 head(trt_ctrl_chrt)

 #--------------------------------------
 # bootstrapped estimates 
 #--------------------------------------

 # the distribution of our outcome is very right skewed - modeling assumptions are based on residuals
 # however residual distributions often mirror the outcome distribution

 hist(df1$re78)
 hist(trt)
 hist(ctrl)
 hist(trt-ctrl)

 # we might consider making inferences based on bootstrapped standard errors

 ### boostrapped estimates of our dummy variable regression above

 library(boot)

 diff <- function(formula, data, indices) {
  d <- data[indices,] # allows boot to select sample 
  fit <- lm(formula, data=d)
  return(coef(fit)[2]) # get the coefficient for treatment (this gives mean differences)
 } 

 # bootstrapping with 1000 replications 
 results <- boot(data=df1, statistic=diff, 
                R=9999, formula=re78 ~  treat)

 # view results
 results 
 plot(results)

 # get 95% confidence interval 
 boot.ci(results, type="bca")


 ### paired bootstrap


 # see also: https://blogs.sas.com/content/iml/2017/07/12/bootstrap-bca-interval.html
 # R documentation: https://cran.r-project.org/web/packages/wBoot/wBoot.pdf 

 library(wBoot)

 trt <- as.vector(trt_ctrl_chrt$Y_trt)
 ctrl <- as.vector(trt_ctrl_chrt$Y_ctrl)

 boot.paired.bca(trt,ctrl, conf.level = 0.95, type="two-sided") # CI


 # boot.paired.bca(trt, ctrl, variable = NULL, null.hyp = 0,
 #                alternative = c("two.sided"),
 #               conf.level = 0.95, type = NULL, R = 9999)


 # roll your own - this is bootstrapping the matched trt and controls
 # note that matching does not necessarily  require pairing observations for analysis 
 # only the distributions of treatments and controls need to be matched as closely as
 # possible (Stuart, 2007)  contrary to Austin PC. Comparing paired vs non-paired statistical 
 # methods of analyses when making inferences about absolute risk reductions in propensity-score 
 # matched samples. Stat Med. 2011;30(11):1292-301.

 # notice how we get very similar results to the boot.paired function above

 trt <- df1$re78[df1$treat==1]
 ctrl <- df1$re78[df1$treat==0]

 t.test(trt, ctrl,paired=FALSE)

 results1= list()


 for(i in 1:9999) {
  d <- randomSample(df1,110)
  diff <- mean(d$re78[d$treat==1]) - mean(d$re78[d$treat==0])
  results1[i] <- diff
  
 }

 results1 <- as.numeric(as.character(unlist(results1))) # convert to numeric

 mean(results1)
 hist(results1)
 sd(results1) # se

 # 95% CI
 l <- mean(trt - ctrl) - 2*sd(results1)
 u <- mean(trt - ctrl) + 2*sd(results1)

 print(cat(l,u,"95% BS CI"))


 # 95% CI based on percentiles
 quantile(results1, c(.025, .975))

 # 90% CI based on percentiles
 quantile(results1, c(.05, .95))
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: MatchIt demo.R
	# \| DATE: 3/6/19
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: demo of R matchit based on: Matching as Nonparametric Preprocessing for
	# \| Reducing Model Dependence in Parametric Causal Inference by Ho,Imai,King,and Stuart
	# \| Political Analysis (2007) 15:199-236 See also: http://www.biostat.jhsph.edu/~estuart/
	# *----------------------------------------------------------------

	# NOTES: this is just a very short demo of the ps matching process using R MatchIt at a
	# very high level. Additional data exploration and analysis would be necessary in
	# a more realistic scenario

	# for additional options and examples (many to one matching, exact matching, optimal matching,
	# subclassification, sensitivity analysis) see documentation:
	# https://cran.r-project.org/web/packages/MatchIt/index.html

	# Data: lalonde - used by Dehejia and Wahba (1999) to evaluate propensity score matching.

	# Ref: Dehejia, Rajeev and Sadek Wahba. 1999.`Causal Effects in Non-Experimental Studies: Re-Evaluating
	# the Evaluation of Training Programs.'' Journal of the American Statistical Association 94 (448):
	# 1053-1062

	# Descriptions:

	# treat - an indicator variable for treatment status.
	# age - age in years.
	# educ - years of schooling.
	# black - indicator variable for blacks.
	# hisp- indicator variable for Hispanics.
	# married - indicator variable for martial status.
	# nodegr - indicator variable for high school diploma.
	# re74 - real earnings in 1974.
	# re75 - real earnings in 1975.
	# re78 - real earnings in 1978.

	# settings and packages

	options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation

	library(MatchIt)
	library(dplyr)

	#-----------------------------
	# data loading and customization
	#-----------------------------

	data("lalonde") # load data
	head(lalonde) # check out first few rows

	df <- lalonde # resave data frame to play with

	### lets do a little customization to this demo data set

	# convert index to column - because I like my unique identifiers to actually be a column
	mbrid <- rownames(df) # extract identifiers from the rowname index
	df <- cbind(mbrid,df) # add to dataframe as a column
	df$mbrid <- as.character(df$mbrid) # character conversion
	head(df) # check


	#--------------------------------------------
	# assess unmatched data
	#-------------------------------------------

	# what is the raw difference in outcome re78 between treatment and controls
	df%>%
	group_by(treat) %>% # group by treatment
	summarize(AVGre78 = mean(re78))
	# treatment group appears to have lower income in 1978 (our outcome variable)

	# is this difference significant
	summary(lm(re78 ~ treat, data = df))

	# difference with controls
	summary(lm(re78 ~ treat + age + educ + black + hispan + married + nodegree + re74 + re75, data = df))

	# controlled comparisons using regression reverse the relationship with postive treatment effect
	# on the order of $1548 that is significant p < .05

	# note: regression is a form of matching in and of itself - the difference is related to
	# weighting, see: Mastering 'Metrics (2015) - same assumptions and identification strategy as matching

	# so why matching vs regression? Chris Blattman puts it well: http://chrisblattman.com/2010/10/27/the-cardinal-sin-of-matching/
	# "For causal inference, the most important difference between regression and matching is what
	# observations count the most.....Matching might make sense if there are observations in your data
	# that have no business being compared to one another, and in that way produce a better estimate"
	# w/o matching models may extrapolate over range of data that does not have both treatment and control units
	# i.e. lack of common support increases model dependence

	# 1) Matching helps reduce selection bias/confounding (comparing like with like) - create the kind of
	# balance or similarity between treatment and controls similar to a RCT
	# 2) Matching reduces model dependence - results become less sensitive to functional form and model assumptions
	# 3) The matching process separates experimental design from modeling/analysis - more honest & doubly robust characteristics

	# KEY THEORY: 1) propensity scores are a balancing score - sample pre-processing to create similarity between
	# treatment and controls. It is a characteristic of the OBSERVED SAMPLE not a hypothetical population
	# i.e. statistical tests are not relevant for checking balance or validity of propensity score model
	# - matching is an exercise in data processing/preprocessing not inference!!!! (Like BULL TV Show)
	# 2) 'conditional independence' - selection on observables, assumes we are controlling for all sources
	# of bias based on what we can OBSERVE...have to recognize unobservable omitted variables can still bias our results



	#----------------------------------------------
	# match on propensity scores
	#---------------------------------------------


	# reference model - we really don't need this this is just for reference
	summary(glm(treat ~ age + educ + black + hispan + married + nodegree + re74 + re75, family = "binomial", data = df))


	# match data using nearest neighbor 1:1 greedy matching using logistic regression and caliper = .10 sd(ps)
	set.seed(1234)
	m.out <- matchit(treat ~ age + educ + black + hispan + married + nodegree + re74 + re75, data=df,method= "nearest", ratio=1,
	distance= "logit", caliper=0.10, replace= F)

	summary(m.out) # check balance for each iteration above to determine which approach gives the most balance
	# (notice the improvement of matches with caliper = .10 vs .25)


	plot(m.out, type = "hist") # common support of matched groups
	plot(m.out, type = "jitter") # scatter plots of propensity scores common support


	# NOTES: you may consider tradeoffs - you may sacrafice balance among less important variables in order to have a
	# larger sample size and allow regression with controls (after matching) extrapolate across those lose ends. You
	# gain statistical power but also increase some sensitivity to functional form

	# if matches are not satisfactory you may consider changing your propensity score model specification.
	# adding higher order interaction terms or considering using machine learning and feeding scores into
	# Matchit

	### Obtain matched analysis file
	df1 <- match.data(m.out)


	#-----------------------------------------
	# estimating treatment effects
	#----------------------------------------

	# what is the difference between outcomes for matched data
	df1%>%
	group_by(treat) %>% # group by treatment
	summarize(AVGre78 = mean(re78))
	# treatment now has much higher value of outcome


	# regression
	summary(lm(re78 ~ treat, data = df1))

	# regression with added controls
	summary(lm(re78 ~ treat + age + educ + black + hispan + married + nodegree + re74 + re75, data = df1))

	# regression adjustment does not remove much additional bias or improve significance

	# we could do a power analysis to see if reduced n from matching is an issue and get an
	# idea of what we could do to increase sample size

	#------------------------------------------
	# power analysis
	#-----------------------------------------

	library(pwr)

	# reference: https://www.statmethods.net/stats/power.html

	# n1 and n2 are the sample sizes
	# effect size: d = \|Mu1 - Mu2\|/sigma
	# Cohen suggests that d values of 0.2, 0.5, and 0.8 represent small, medium, and large effect

	Mu1 <- mean(df1$re78[df1$treat==1])
	Mu2 <- mean(df1$re78[df1$treat==0])
	sigma <- sd(df1$re78)
	d = abs(Mu1 - Mu2)/sigma

	# with the effects and sample sizes we have how much power do we have:
	pwr.t.test(n = 111 , d = d , sig.level = .05 , power = , type = c("paired")) # power = .38

	# how many pairs would we need to see this effect with 80% power:
	# note without matching we still only have
	pwr.t.test(n = , d = d , sig.level = .05 , power = .8 , type = c("paired")) # 309 we only have 111

	#---------------------------------------
	#
	# examining matched observations
	#
	#---------------------------------------

	# although "matching does not require pairing observations"...."only the distributions
	# need be matched as closely as possible" in some applications we want to pull pre and post period data for
	# treatments and controls based on an index date representing the date of treatment for the treatment group.
	# For control groups we typically 'assign' an index date based on the index date of the unit of observation
	# they are matched to if they don't already have a plausible index date. Also, if we want to leverage
	# bootstrapped standard errors to make inferences we may want to do paired bootstraps - in these cases
	# we need to identfy 'matched pairs' to get the index date or do paired bootstrap sampling
	#

	matchids <- data.frame(m.out$match.matrix) # extract matched pairs from Matchit output

	# convert index to column to get treatment id and rename 1st column as the control id
	trtid <- rownames(matchids)
	matchids <- cbind(trtid,matchids)%>%rename(ctrlid = X1)

	# format conversions
	matchids$trtid <- as.character(matchids$trtid)
	matchids$ctrlid <- as.character(matchids$ctrlid)

	# get only treatment data from matched output data to form a treatment cohort file
	trtchrt <- df1%>%filter(treat ==1)%>%rename(pstrt = distance, Y_trt = re78)
	trtchrt$trtid <- trtchrt$mbrid
	trtchrt<- trtchrt%>%select(mbrid,trtid,treat,Y_trt,pstrt)

	# from matchids add the corresponding control id to the treatment cohort file
	trtchrt <- trtchrt%>%left_join(select(matchids,ctrlid,trtid), by = "trtid")

	# get only control data from matched output data to form a contrl cohort file with their ps and id
	ctrlchrt <- df1%>%filter(treat ==0)%>%rename(psctrl = distance, Y_ctrl = re78) # filter controls and rename ps variable
	ctrlchrt$ctrlid <- ctrlchrt$mbrid # create ctrl id
	ctrlchrt <- ctrlchrt%>%select(mbrid,ctrlid,treat,Y_ctrl,psctrl)

	# add the matched control's ps and outcome to the treatment cohort file joining on the ctrlid
	trt_ctrl_chrt <- trtchrt%>%left_join(select(ctrlchrt,ctrlid,psctrl,Y_ctrl), by = "ctrlid")


	# did we conserve our matched pairs from matchedids?

	head(matchids)
	head(trt_ctrl_chrt)

	#--------------------------------------
	# bootstrapped estimates
	#--------------------------------------

	# the distribution of our outcome is very right skewed - modeling assumptions are based on residuals
	# however residual distributions often mirror the outcome distribution

	hist(df1$re78)
	hist(trt)
	hist(ctrl)
	hist(trt-ctrl)

	# we might consider making inferences based on bootstrapped standard errors

	### boostrapped estimates of our dummy variable regression above

	library(boot)

	diff <- function(formula, data, indices) {
	d <- data[indices,] # allows boot to select sample
	fit <- lm(formula, data=d)
	return(coef(fit)[2]) # get the coefficient for treatment (this gives mean differences)
	}

	# bootstrapping with 1000 replications
	results <- boot(data=df1, statistic=diff,
	R=9999, formula=re78 ~ treat)

	# view results
	results
	plot(results)

	# get 95% confidence interval
	boot.ci(results, type="bca")


	### paired bootstrap


	# see also: https://blogs.sas.com/content/iml/2017/07/12/bootstrap-bca-interval.html
	# R documentation: https://cran.r-project.org/web/packages/wBoot/wBoot.pdf

	library(wBoot)

	trt <- as.vector(trt_ctrl_chrt$Y_trt)
	ctrl <- as.vector(trt_ctrl_chrt$Y_ctrl)

	boot.paired.bca(trt,ctrl, conf.level = 0.95, type="two-sided") # CI


	# boot.paired.bca(trt, ctrl, variable = NULL, null.hyp = 0,
	# alternative = c("two.sided"),
	# conf.level = 0.95, type = NULL, R = 9999)


	# roll your own - this is bootstrapping the matched trt and controls
	# note that matching does not necessarily require pairing observations for analysis
	# only the distributions of treatments and controls need to be matched as closely as
	# possible (Stuart, 2007) contrary to Austin PC. Comparing paired vs non-paired statistical
	# methods of analyses when making inferences about absolute risk reductions in propensity-score
	# matched samples. Stat Med. 2011;30(11):1292-301.

	# notice how we get very similar results to the boot.paired function above

	trt <- df1$re78[df1$treat==1]
	ctrl <- df1$re78[df1$treat==0]

	t.test(trt, ctrl,paired=FALSE)

	results1= list()


	for(i in 1:9999) {
	d <- randomSample(df1,110)
	diff <- mean(d$re78[d$treat==1]) - mean(d$re78[d$treat==0])
	results1[i] <- diff

	}

	results1 <- as.numeric(as.character(unlist(results1))) # convert to numeric

	mean(results1)
	hist(results1)
	sd(results1) # se

	# 95% CI
	l <- mean(trt - ctrl) - 2*sd(results1)
	u <- mean(trt - ctrl) + 2*sd(results1)

	print(cat(l,u,"95% BS CI"))


	# 95% CI based on percentiles
	quantile(results1, c(.025, .975))

	# 90% CI based on percentiles
	quantile(results1, c(.05, .95))