BioSciEconomist · April 30, 2019 23:01
diff --git a/simulate poisson.R b/simulate poisson.R
 #  ------------------------------------------------------------------
 #  |PROGRAM NAME: simulate poisson.R
 #  |DATE: 4/29/18
 #  |CREATED BY: MATT BOGARD 
 #  |PROJECT FILE: /Users/amandabogard/Google Drive/R Training
 #  |----------------------------------------------------------------
 #  | PURPOSE: simulate data for poisson regression
 #  |----------------------------------------------------------------

 # this can be modified to generate data for any glm model

 #-----------------------------------
 # data simulation
 #----------------------------------

 set.seed(123)

 Z <- list() # this will be our treatment assignment indicator 
 y <- list() # this will be our outcome

 # create 100 treatment cases
 for(i in 1:500) {
  Z[i] = 1
  y[i] = rpois(n = 1, lambda = .5)
 }

 # create 100 controls
 for(i in 501:1000) {
  Z[i] = 0
  y[i] = rpois(n = 1, lambda = 1)
 }

 # unlist our data and create data frame
 y <- as.numeric(as.character(unlist(y)))
 Z <- as.numeric(as.character(unlist(Z)))
 df <- data.frame(y,Z) 
 df$flag <- ifelse(y > 0,1,0) # flag postive cases

 #----------------------------------
 # analysis
 #----------------------------------

 # cross tabs
 df%>% 
  group_by(Z)%>%
  summarize(Yavg = mean(y)) 
 # control value (Z=0): 1 treatment value (Z=1): .47

 # explore prob y > 0
 df%>% 
  group_by(Z)%>%
  summarize(Flagavg = mean(flag)) 

 # poisson model estimate
 glm(y~Z, family = "poisson", data = df) # fit poisson model

 exp(-.755) # get IRR ~ .47

 # this reflects the ratio of values for treatment and control counts we saw in our crosstabs
 .47/1 # .47

 # logit model estimate
 glm(flag~Z, family = "binomial", data = df) # fit poisson model


 #---------------------------------------
 # check our assumptions about this data
 #---------------------------------------

 y <- seq(1,10000,by=1) # create an ID
 x <- rpois(n = 10000, lambda = 2) #  random poisson with average count = 2
 df <- data.frame(y,x) # combine into df
 df$flag <- ifelse(df$x > 0,1,0) # flag postive cases

 summary(df$flag) # p(x>0) ~ .866
 summary(df$x) # mean(count) ~ 2
 var(df$x) # var(x) ~ mean(x) does not exhibit overdispersion
 hist(df$x) # histogram
	# ------------------------------------------------------------------
	# \|PROGRAM NAME: simulate poisson.R
	# \|DATE: 4/29/18
	# \|CREATED BY: MATT BOGARD
	# \|PROJECT FILE: /Users/amandabogard/Google Drive/R Training
	# \|----------------------------------------------------------------
	# \| PURPOSE: simulate data for poisson regression
	# \|----------------------------------------------------------------

	# this can be modified to generate data for any glm model

	#-----------------------------------
	# data simulation
	#----------------------------------

	set.seed(123)

	Z <- list() # this will be our treatment assignment indicator
	y <- list() # this will be our outcome

	# create 100 treatment cases
	for(i in 1:500) {
	Z[i] = 1
	y[i] = rpois(n = 1, lambda = .5)
	}

	# create 100 controls
	for(i in 501:1000) {
	Z[i] = 0
	y[i] = rpois(n = 1, lambda = 1)
	}

	# unlist our data and create data frame
	y <- as.numeric(as.character(unlist(y)))
	Z <- as.numeric(as.character(unlist(Z)))
	df <- data.frame(y,Z)
	df$flag <- ifelse(y > 0,1,0) # flag postive cases

	#----------------------------------
	# analysis
	#----------------------------------

	# cross tabs
	df%>%
	group_by(Z)%>%
	summarize(Yavg = mean(y))
	# control value (Z=0): 1 treatment value (Z=1): .47

	# explore prob y > 0
	df%>%
	group_by(Z)%>%
	summarize(Flagavg = mean(flag))

	# poisson model estimate
	glm(y~Z, family = "poisson", data = df) # fit poisson model

	exp(-.755) # get IRR ~ .47

	# this reflects the ratio of values for treatment and control counts we saw in our crosstabs
	.47/1 # .47

	# logit model estimate
	glm(flag~Z, family = "binomial", data = df) # fit poisson model


	#---------------------------------------
	# check our assumptions about this data
	#---------------------------------------

	y <- seq(1,10000,by=1) # create an ID
	x <- rpois(n = 10000, lambda = 2) # random poisson with average count = 2
	df <- data.frame(y,x) # combine into df
	df$flag <- ifelse(df$x > 0,1,0) # flag postive cases

	summary(df$flag) # p(x>0) ~ .866
	summary(df$x) # mean(count) ~ 2
	var(df$x) # var(x) ~ mean(x) does not exhibit overdispersion
	hist(df$x) # histogram
No results found