BioSciEconomist · January 6, 2022 20:23
diff --git a/ex EValues.R b/ex EValues.R
 # *-----------------------------------------------------------------
 # | PROGRAM NAME: ex EValues.R
 # | DATE: 1/6/22 
 # | CREATED BY: MATT BOGARD 
 # | PROJECT FILE:            
 # *----------------------------------------------------------------
 # | PURPOSE: explore using e-values for sensitifity to confounding in the context of OLS 
 # *----------------------------------------------------------------

 # references:

 # Mathur, M. B., Ding, P., Riddell, C. A., & VanderWeele, T. J. (2018). Web Site and R Package for Computing E-values. 
 # Epidemiology (Cambridge, Mass.), 29(5), e45–e47. https://doi.org/10.1097/EDE.0000000000000864

 # https://cran.r-project.org/web/packages/EValue/vignettes/unmeasured-confounding.html
 # https://cran.r-project.org/web/packages/EValue/EValue.pdf 
 # https://cdn1.sph.harvard.edu/wp-content/uploads/sites/603/2020/05/Evalue_Stata_Article.pdf 
 # https://biostat.duke.edu/sites/biostat.duke.edu/files/DukeBiostat2018.pdf
 # https://www.degruyter.com/document/doi/10.1515/jci-2018-0007/html

 options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation
 library(dplyr)
 library(EValue)

 #
 # example from: https://cran.r-project.org/web/packages/EValue/vignettes/unmeasured-confounding.html
 #

 # background
 # Hammond and Horn estimated that cigarette smoking increased the risk of lung cancer by more than 10-fold. 
 # Fisher proposed that genetic confounding may explain all of the apparent association.
 # We can calculate the magnitude of confounding that would be necessary to fully explain 
 # the estimated risk ratio of 10.73 (95% CI 8.02, 14.36):

 evalues.RR(est = 10.73, lo = 8.02, hi = 14.36)

 # The E-value of 20.95 tells us that a confounder, or set of confounders, such as that proposed by Fisher, 
 # would have to be associated with a 20-fold increase in the risk of lung cancer, and must be 20 times more 
 # prevalent in smokers than non-smokers, to explain the observed risk ratio. If the strength of one of these 
 # relationships were weaker, the other would have to be stronger for the causal effect of smoking on lung 
 # cancer to be truly null

 #
 # example OLS from: https://cran.r-project.org/web/packages/EValue/EValue.pdf 
 #

 # note: 

 # This function is for linear regression with a continuous exposure and outcome. 
 # Regarding the con- tinuous exposure, the choice of delta defines essentially a 
 # dichotomization in the exposure between hypothetical groups of subjects with 
 # exposures equal to an arbitrary value c versus to another hypo- thetical group
 # with exposures equal to c + delta. Regarding the continuous outcome, the function 
 # uses the effect-size conversions in Chinn (2000) and VanderWeele (2017) to approximately 
 # con- vert the mean difference between these exposure "groups" to the odds ratio that 
 # would arise from dichotomizing the continuous outcome.

 # For example, if resulting E-value is 2, this means that unmeasured confounder(s)
 # would need to double the probability of a subject’s having exposure equal to c + delta 
 # instead of c, and would also need to double the probability of being high versus low 
 # on the outcome, in which the cutoff for "high" versus "low" is arbitrary subject 
 # to some distributional assumptions (Chinn, 2000). A true standardized mean difference 
 # for linear regression would use sd = SD(Y | X, C), where Y is the outcome, 
 # X is the exposure of interest, and C are any adjusted covariates. 
 # See Examples for how to extract this from lm. A conservative approximation would
 # instead use sd = SD(Y). Regardless, the reported E-value for the confidence 
 # interval treats sd as known, not estimated.

 data ("lead") # load data from package
 head(lead)

 # first standardizing conservatively by SD(Y) data(lead)
 summary(lm(age ~ income, data = lead))
 ols = lm(age ~ income, data = lead)

 # for a 1-unit increase in income 
 evalues.OLS(est = ols$coefficients[2], se = summary(ols)$coefficients['income', 'Std. Error'], sd = sd(lead$age))

 # for a 0.5-unit increase in income 
 evalues.OLS(est = ols$coefficients[2], se = summary(ols)$coefficients['income', 'Std. Error'], sd = sd(lead$age),delta = 0.5)

 # now use residual SD to avoid conservatism
 evalues.OLS(est = ols$coefficients[2],
            se = summary(ols)$coefficients['income', 'Std. Error'], sd = summary(ols)$sigma)


 #-----------------------------------
 # example with lalond data
 #----------------------------------

 library(MatchIt)

 data("lalonde") # load lalonde data

 head(lalonde)

 # first standardizing conservatively by SD(Y) data(lead)
 summary(lm(re78 ~ treat, data = lalonde))
 ols = lm(re78 ~ treat, data = lalonde)

 # for a 1-unit change in 'treat' (assuming this is just the contrast between treatment and control for a binary treatment indicator)
 evalues.OLS(est = ols$coefficients[2], se = summary(ols)$coefficients['treat', 'Std. Error'], sd = sd(lalonde$re78))

 # OUTPUT
 #          point  lower upper
 # RR       0.9256 0.7914 1.082
 # E-values 1.3752     NA 1.000

 # Questions and Interpretation:

 # ? RR = .9256 ~ converson of treatment effect from OLS (-635) to a RR per documentation

 # E-value: with an observed RR = .9256, an unmeasured confounder assocaited with outcome (re78) and exposure (treat) would
 # have to have a RR = 1.37 in order to explain away the estimate, but weaker confounding could not.

 # ? given the relatively small RR (compared to the observed RR) you might conclude that confounding could plausibly explain
 # away this relationship ? 

 #
 # e-values with controls
 #

 # standardizing conservatively by SD(Y) 
 summary(lm(re78 ~  treat + age + educ + race + married + nodegree + re74 + re75, data = lalonde))
 ols = lm(re78 ~ treat + age + educ + race + married + nodegree + re74 + re75, data = lalonde)

 # for a 1-unit change in 'treat' (assuming this is just the contrast between treatment and control for a binary treatment indicator)
 evalues.OLS(est = ols$coefficients[2], se = summary(ols)$coefficients['treat', 'Std. Error'], sd = sd(lalonde$re78))
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: ex EValues.R
	# \| DATE: 1/6/22
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: explore using e-values for sensitifity to confounding in the context of OLS
	# *----------------------------------------------------------------

	# references:

	# Mathur, M. B., Ding, P., Riddell, C. A., & VanderWeele, T. J. (2018). Web Site and R Package for Computing E-values.
	# Epidemiology (Cambridge, Mass.), 29(5), e45–e47. https://doi.org/10.1097/EDE.0000000000000864

	# https://cran.r-project.org/web/packages/EValue/vignettes/unmeasured-confounding.html
	# https://cran.r-project.org/web/packages/EValue/EValue.pdf
	# https://cdn1.sph.harvard.edu/wp-content/uploads/sites/603/2020/05/Evalue_Stata_Article.pdf
	# https://biostat.duke.edu/sites/biostat.duke.edu/files/DukeBiostat2018.pdf
	# https://www.degruyter.com/document/doi/10.1515/jci-2018-0007/html

	options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation
	library(dplyr)
	library(EValue)

	#
	# example from: https://cran.r-project.org/web/packages/EValue/vignettes/unmeasured-confounding.html
	#

	# background
	# Hammond and Horn estimated that cigarette smoking increased the risk of lung cancer by more than 10-fold.
	# Fisher proposed that genetic confounding may explain all of the apparent association.
	# We can calculate the magnitude of confounding that would be necessary to fully explain
	# the estimated risk ratio of 10.73 (95% CI 8.02, 14.36):

	evalues.RR(est = 10.73, lo = 8.02, hi = 14.36)

	# The E-value of 20.95 tells us that a confounder, or set of confounders, such as that proposed by Fisher,
	# would have to be associated with a 20-fold increase in the risk of lung cancer, and must be 20 times more
	# prevalent in smokers than non-smokers, to explain the observed risk ratio. If the strength of one of these
	# relationships were weaker, the other would have to be stronger for the causal effect of smoking on lung
	# cancer to be truly null

	#
	# example OLS from: https://cran.r-project.org/web/packages/EValue/EValue.pdf
	#

	# note:

	# This function is for linear regression with a continuous exposure and outcome.
	# Regarding the con- tinuous exposure, the choice of delta defines essentially a
	# dichotomization in the exposure between hypothetical groups of subjects with
	# exposures equal to an arbitrary value c versus to another hypo- thetical group
	# with exposures equal to c + delta. Regarding the continuous outcome, the function
	# uses the effect-size conversions in Chinn (2000) and VanderWeele (2017) to approximately
	# con- vert the mean difference between these exposure "groups" to the odds ratio that
	# would arise from dichotomizing the continuous outcome.

	# For example, if resulting E-value is 2, this means that unmeasured confounder(s)
	# would need to double the probability of a subject’s having exposure equal to c + delta
	# instead of c, and would also need to double the probability of being high versus low
	# on the outcome, in which the cutoff for "high" versus "low" is arbitrary subject
	# to some distributional assumptions (Chinn, 2000). A true standardized mean difference
	# for linear regression would use sd = SD(Y \| X, C), where Y is the outcome,
	# X is the exposure of interest, and C are any adjusted covariates.
	# See Examples for how to extract this from lm. A conservative approximation would
	# instead use sd = SD(Y). Regardless, the reported E-value for the confidence
	# interval treats sd as known, not estimated.

	data ("lead") # load data from package
	head(lead)

	# first standardizing conservatively by SD(Y) data(lead)
	summary(lm(age ~ income, data = lead))
	ols = lm(age ~ income, data = lead)

	# for a 1-unit increase in income
	evalues.OLS(est = ols$coefficients[2], se = summary(ols)$coefficients['income', 'Std. Error'], sd = sd(lead$age))

	# for a 0.5-unit increase in income
	evalues.OLS(est = ols$coefficients[2], se = summary(ols)$coefficients['income', 'Std. Error'], sd = sd(lead$age),delta = 0.5)

	# now use residual SD to avoid conservatism
	evalues.OLS(est = ols$coefficients[2],
	se = summary(ols)$coefficients['income', 'Std. Error'], sd = summary(ols)$sigma)


	#-----------------------------------
	# example with lalond data
	#----------------------------------

	library(MatchIt)

	data("lalonde") # load lalonde data

	head(lalonde)

	# first standardizing conservatively by SD(Y) data(lead)
	summary(lm(re78 ~ treat, data = lalonde))
	ols = lm(re78 ~ treat, data = lalonde)

	# for a 1-unit change in 'treat' (assuming this is just the contrast between treatment and control for a binary treatment indicator)
	evalues.OLS(est = ols$coefficients[2], se = summary(ols)$coefficients['treat', 'Std. Error'], sd = sd(lalonde$re78))

	# OUTPUT
	# point lower upper
	# RR 0.9256 0.7914 1.082
	# E-values 1.3752 NA 1.000

	# Questions and Interpretation:

	# ? RR = .9256 ~ converson of treatment effect from OLS (-635) to a RR per documentation

	# E-value: with an observed RR = .9256, an unmeasured confounder assocaited with outcome (re78) and exposure (treat) would
	# have to have a RR = 1.37 in order to explain away the estimate, but weaker confounding could not.

	# ? given the relatively small RR (compared to the observed RR) you might conclude that confounding could plausibly explain
	# away this relationship ?

	#
	# e-values with controls
	#

	# standardizing conservatively by SD(Y)
	summary(lm(re78 ~ treat + age + educ + race + married + nodegree + re74 + re75, data = lalonde))
	ols = lm(re78 ~ treat + age + educ + race + married + nodegree + re74 + re75, data = lalonde)

	# for a 1-unit change in 'treat' (assuming this is just the contrast between treatment and control for a binary treatment indicator)
	evalues.OLS(est = ols$coefficients[2], se = summary(ols)$coefficients['treat', 'Std. Error'], sd = sd(lalonde$re78))