sebnyberg · May 11, 2017 15:01
diff --git a/3.3.R b/3.3.R
 source('helpers.R')


 # ========================================
 # ==============[ Q 3.3.1 ]===============
 # ========================================

 load('hilda.Rdata')

 # redefine the hospitaldays to be 1 for anything above 1
 hilda$hospdays[(hilda$hospdays >= 1)] = 1

 # count no.hospdays to be the complement of hospdays
 hilda$no.hospdays = 1 - hilda$hospdays

 # define sex as a factor
 hilda$sex <- factor(hilda$sex, labels=c("male", "female"), levels=c(1,2))


 # redefine all answers which were "other" or "not answered" to be NA
 hilda$health[(hilda$health >= 4)] = NA
 hilda$health <- factor(hilda$health, labels=c("good", "bad", "inbetween"))
 levels(hilda$health) <- c("good", "not good", "not good")


 agg <- aggregate(hilda[,c("hospdays","no.hospdays")],by=list(sex=hilda$sex),FUN=sum)
 agg$total <- agg$hospdays + agg$no.hospdays
 agg$props <- agg$hospdays / agg$total


 # it does seem this information can be of help for us
 # lets form a model and see what the wald test tells us
 model.sex <- glm(hilda$hospdays ~ hilda$sex, family=binomial)
 summary(model.sex)

 # clearly significant against the reference category

 # lets form a glm with age and health
 # we already have model.{age|health|age.trans}
 model.new <- glm(hilda$hospdays ~ hilda$age + hilda$health, family=binomial)

 model.health <- glm(hilda$hospdays ~ hilda$health, family=binomial)

 # is this model better than hospdays~health?
 compare_models(model.health, model.new)

 model.current <- model.new

 # Ddiff is much greater than the chi squared value for one degree of freedom
 # we therefore choose the model with both health and age

 age.sq <- hilda$age^2

 # lets see if our previous transformation is worth it
 model.new <- glm(hilda$hospdays ~ hilda$age + age.sq + hilda$health, family=binomial)

 compare_models(model.current, model.new)

 # we have a lower deviance, so we include the transformation..
 model.current <- model.new

 # lets use all three categories from the assignment
 model.new <- glm(hilda$hospdays ~ hilda$age + age.sq + hilda$health + hilda$sex, family=binomial)

 compare_models(model.current, model.new)

 # once again we improved the model.
 model.current <- model.new

 ################### questions

 coeff <- model.new$coefficients

 # male is the reference category for sex, good health for health,
 # thus coeff[1] is age 0 male with good health


 # question 1
 # 70 years old female with not good health
 # age = 70, sex = "female", health = "not good"
 x.female <- c(1, 70, 70^2, 1, 1)
 x.male <- c(1, 70, 70^2, 1, 0)
 ln.odds.f <- coeff %*% x.female
 ln.odds.m <- coeff %*% x.male

 odds.f <- exp(ln.odds.f)
 odds.m <- exp(ln.odds.m)

 or.fm <- odds.f / odds.m
 or.fm

 # The odds of a female spending more than one day in the hospital as compared
 # to a man is only 0.5304792...

 x.m90 <- c(1, 90, 90^2, 0, 0)
 x.m60 <- c(1, 60, 60^2, 1, 0)

 # question 2
 ln.odds.m90 <- coeff %*% x.m90
 ln.odds.m60 <- coeff %*% x.m60

 odds.m90 <- exp(ln.odds.m90)
 odds.m60 <- exp(ln.odds.m60)

 or.m90m60 <- odds.m90 / odds.m60
 or.m90m60

 # The odds of a male age 90 with good health vs age 60 not good health is
 # 1.082004
	source('helpers.R')


	# ========================================
	# ==============[ Q 3.3.1 ]===============
	# ========================================

	load('hilda.Rdata')

	# redefine the hospitaldays to be 1 for anything above 1
	hilda$hospdays[(hilda$hospdays >= 1)] = 1

	# count no.hospdays to be the complement of hospdays
	hilda$no.hospdays = 1 - hilda$hospdays

	# define sex as a factor
	hilda$sex <- factor(hilda$sex, labels=c("male", "female"), levels=c(1,2))


	# redefine all answers which were "other" or "not answered" to be NA
	hilda$health[(hilda$health >= 4)] = NA
	hilda$health <- factor(hilda$health, labels=c("good", "bad", "inbetween"))
	levels(hilda$health) <- c("good", "not good", "not good")


	agg <- aggregate(hilda[,c("hospdays","no.hospdays")],by=list(sex=hilda$sex),FUN=sum)
	agg$total <- agg$hospdays + agg$no.hospdays
	agg$props <- agg$hospdays / agg$total


	# it does seem this information can be of help for us
	# lets form a model and see what the wald test tells us
	model.sex <- glm(hilda$hospdays ~ hilda$sex, family=binomial)
	summary(model.sex)

	# clearly significant against the reference category

	# lets form a glm with age and health
	# we already have model.{age\|health\|age.trans}
	model.new <- glm(hilda$hospdays ~ hilda$age + hilda$health, family=binomial)

	model.health <- glm(hilda$hospdays ~ hilda$health, family=binomial)

	# is this model better than hospdays~health?
	compare_models(model.health, model.new)

	model.current <- model.new

	# Ddiff is much greater than the chi squared value for one degree of freedom
	# we therefore choose the model with both health and age

	age.sq <- hilda$age^2

	# lets see if our previous transformation is worth it
	model.new <- glm(hilda$hospdays ~ hilda$age + age.sq + hilda$health, family=binomial)

	compare_models(model.current, model.new)

	# we have a lower deviance, so we include the transformation..
	model.current <- model.new

	# lets use all three categories from the assignment
	model.new <- glm(hilda$hospdays ~ hilda$age + age.sq + hilda$health + hilda$sex, family=binomial)

	compare_models(model.current, model.new)

	# once again we improved the model.
	model.current <- model.new

	################### questions

	coeff <- model.new$coefficients

	# male is the reference category for sex, good health for health,
	# thus coeff[1] is age 0 male with good health


	# question 1
	# 70 years old female with not good health
	# age = 70, sex = "female", health = "not good"
	x.female <- c(1, 70, 70^2, 1, 1)
	x.male <- c(1, 70, 70^2, 1, 0)
	ln.odds.f <- coeff %*% x.female
	ln.odds.m <- coeff %*% x.male

	odds.f <- exp(ln.odds.f)
	odds.m <- exp(ln.odds.m)

	or.fm <- odds.f / odds.m
	or.fm

	# The odds of a female spending more than one day in the hospital as compared
	# to a man is only 0.5304792...

	x.m90 <- c(1, 90, 90^2, 0, 0)
	x.m60 <- c(1, 60, 60^2, 1, 0)

	# question 2
	ln.odds.m90 <- coeff %*% x.m90
	ln.odds.m60 <- coeff %*% x.m60

	odds.m90 <- exp(ln.odds.m90)
	odds.m60 <- exp(ln.odds.m60)

	or.m90m60 <- odds.m90 / odds.m60
	or.m90m60

	# The odds of a male age 90 with good health vs age 60 not good health is
	# 1.082004