Created
May 11, 2017 15:01
-
-
Save sebnyberg/c623c2d5412831bb199f47894dc5eedf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source('helpers.R') | |
# ======================================== | |
# ==============[ Q 3.3.1 ]=============== | |
# ======================================== | |
load('hilda.Rdata') | |
# redefine the hospitaldays to be 1 for anything above 1 | |
hilda$hospdays[(hilda$hospdays >= 1)] = 1 | |
# count no.hospdays to be the complement of hospdays | |
hilda$no.hospdays = 1 - hilda$hospdays | |
# define sex as a factor | |
hilda$sex <- factor(hilda$sex, labels=c("male", "female"), levels=c(1,2)) | |
# redefine all answers which were "other" or "not answered" to be NA | |
hilda$health[(hilda$health >= 4)] = NA | |
hilda$health <- factor(hilda$health, labels=c("good", "bad", "inbetween")) | |
levels(hilda$health) <- c("good", "not good", "not good") | |
agg <- aggregate(hilda[,c("hospdays","no.hospdays")],by=list(sex=hilda$sex),FUN=sum) | |
agg$total <- agg$hospdays + agg$no.hospdays | |
agg$props <- agg$hospdays / agg$total | |
# it does seem this information can be of help for us | |
# lets form a model and see what the wald test tells us | |
model.sex <- glm(hilda$hospdays ~ hilda$sex, family=binomial) | |
summary(model.sex) | |
# clearly significant against the reference category | |
# lets form a glm with age and health | |
# we already have model.{age|health|age.trans} | |
model.new <- glm(hilda$hospdays ~ hilda$age + hilda$health, family=binomial) | |
model.health <- glm(hilda$hospdays ~ hilda$health, family=binomial) | |
# is this model better than hospdays~health? | |
compare_models(model.health, model.new) | |
model.current <- model.new | |
# Ddiff is much greater than the chi squared value for one degree of freedom | |
# we therefore choose the model with both health and age | |
age.sq <- hilda$age^2 | |
# lets see if our previous transformation is worth it | |
model.new <- glm(hilda$hospdays ~ hilda$age + age.sq + hilda$health, family=binomial) | |
compare_models(model.current, model.new) | |
# we have a lower deviance, so we include the transformation.. | |
model.current <- model.new | |
# lets use all three categories from the assignment | |
model.new <- glm(hilda$hospdays ~ hilda$age + age.sq + hilda$health + hilda$sex, family=binomial) | |
compare_models(model.current, model.new) | |
# once again we improved the model. | |
model.current <- model.new | |
################### questions | |
coeff <- model.new$coefficients | |
# male is the reference category for sex, good health for health, | |
# thus coeff[1] is age 0 male with good health | |
# question 1 | |
# 70 years old female with not good health | |
# age = 70, sex = "female", health = "not good" | |
x.female <- c(1, 70, 70^2, 1, 1) | |
x.male <- c(1, 70, 70^2, 1, 0) | |
ln.odds.f <- coeff %*% x.female | |
ln.odds.m <- coeff %*% x.male | |
odds.f <- exp(ln.odds.f) | |
odds.m <- exp(ln.odds.m) | |
or.fm <- odds.f / odds.m | |
or.fm | |
# The odds of a female spending more than one day in the hospital as compared | |
# to a man is only 0.5304792... | |
x.m90 <- c(1, 90, 90^2, 0, 0) | |
x.m60 <- c(1, 60, 60^2, 1, 0) | |
# question 2 | |
ln.odds.m90 <- coeff %*% x.m90 | |
ln.odds.m60 <- coeff %*% x.m60 | |
odds.m90 <- exp(ln.odds.m90) | |
odds.m60 <- exp(ln.odds.m60) | |
or.m90m60 <- odds.m90 / odds.m60 | |
or.m90m60 | |
# The odds of a male age 90 with good health vs age 60 not good health is | |
# 1.082004 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment