Created
May 11, 2017 15:02
-
-
Save sebnyberg/038697a0f4db56bec13f9cfe86fa6645 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ======================================== | |
# ==============[ Q 3.1.1 ]=============== | |
# ======================================== | |
# load data | |
load('hilda.Rdata') | |
source('helpers.R') | |
# redifine the hospitaldays to be 1 for anything above 1 | |
hilda$hospdays[(hilda$hospdays >= 1)] = 1 | |
hilda$hospdays | |
# redefine all answers which were "other" or "not answered" to be NA | |
hilda$health[(hilda$health >= 4)] = NA | |
hilda$health <- factor(hilda$health, labels=c("good", "bad", "inbetween")) | |
hilda$health | |
# check that all covariates are sigificantly different from zero | |
model.no.ref <- glm(hilda$hospdays ~ hilda$health - 1, family="binomial") | |
# ^^^^^^ this makes categories non-relative to the reference | |
summary(model.no.ref) | |
# they are significantly different from zero | |
# lets try a model with healthgood as a reference category | |
model.ref.good <- glm(hilda$hospdays ~ hilda$health, family="binomial") | |
summary(model.ref.good) | |
# lets check if healthbad is significantly different from health inbetween | |
model.ref.bad <- glm(hilda$hospdays ~ factor(hilda$health, levels=c("bad","inbetween","good")), family="binomial") | |
summary(model.ref.bad) | |
# inbetween is not significantly different from bad (the reference category)... | |
# we should consider using a model which merges the categories. | |
# Lets check the ANOVA for the merged vs split model. | |
# create merged health | |
healthmerged <- hilda$health | |
levels(healthmerged) <- c("good", "not good", "not good") | |
# compare models | |
model.original <- glm(hilda$hospdays ~ hilda$health, family=binomial) | |
model.merged <- glm(hilda$hospdays ~ healthmerged, family=binomial) | |
# anova will use a likelihood ratio test to compare the models | |
anova(model.merged, model.original) | |
qchisq(1-0.05,1) | |
# since 3.444 < 3.841459 we can not reject the null hypothesis (that the two models are the same) | |
# thus, merging the models does not result in a loss or improvement either way | |
model.health <- model.merged | |
# merge the groups, since the Pr(>|z|) was high (0.6) | |
hilda$health <- healthmerged | |
# ======================================== | |
# ==============[ Q 3.1.2 ]=============== | |
# ======================================== | |
# since the model coefficients are w.r.t. the reference category (good) | |
# the odds ratio for people with not good as compared to good is | |
ln.or.notgood <- model.health$coefficients[1] | |
or.notgood <- exp(ln.or.notgood) | |
# the S.E. is the second diagonal element in the covariance matrix | |
# of our model | |
se <- sqrt(diag(summary(model.health)$cov.unscaled)) | |
se.or.notgood <- se[2] | |
# calculate bounds | |
lo <- ln.or.notgood - 1.96 * se.or.notgood | |
hi <- ln.or.notgood + 1.96 * se.or.notgood | |
# calculate confidence interval for | |
ci.or.notgood.lo <- exp(lo) | |
ci.or.notgood.hi <- exp(hi) | |
# ======================================== | |
# ==============[ Q 3.1.3 ]=============== | |
# ======================================== | |
beta0 <- model.health$coefficients[1] | |
beta1 <- model.health$coefficients[2] | |
ln.odds.notgood <- beta0 + beta1 | |
odds.notgood <- exp(ln.odds.notgood) | |
se.notgood <- sqrt(se[2]^2 - se[1]^2) | |
# calculate bounds | |
lo.notgood <- ln.odds.notgood - 1.96 * se.notgood | |
hi.notgood <- ln.odds.notgood + 1.96 * se.notgood | |
# calculate confidence interval for odds of not good | |
ci.notgood.lo <- exp(lo.notgood) | |
ci.notgood.hi <- exp(hi.notgood) | |
# ======================================== | |
# ==============[ Q 3.1.4 ]=============== | |
# ======================================== | |
# probability of having at least one hospital day for people with good health: | |
odds.good <- exp(beta0) | |
prob.good <- odds.good / (1 + odds.good) | |
se.good <- se[1] | |
lo.good <- beta0 - 1.96 * se.good | |
hi.good <- beta0 + 1.96 * se.good | |
ci.prob.good.lo <- exp(lo.good) / (1 + exp(lo.good)) | |
ci.prob.good.hi <- exp(hi.good) / (1 + exp(hi.good)) | |
# for not good health | |
prob.notgood <- odds.notgood / (1 + odds.notgood) | |
ci.prob.notgood.lo <- exp(lo.notgood) / (1 + exp(lo.notgood)) | |
ci.prob.notgood.hi <- exp(hi.notgood) / (1 + exp(hi.notgood)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment