Last active
April 30, 2019 23:01
-
-
Save BioSciEconomist/6f912ecb9029c952b3487538c7dc03db to your computer and use it in GitHub Desktop.
simulate basic data for poisson regression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ------------------------------------------------------------------ | |
| # |PROGRAM NAME: simulate poisson.R | |
| # |DATE: 4/29/18 | |
| # |CREATED BY: MATT BOGARD | |
| # |PROJECT FILE: /Users/amandabogard/Google Drive/R Training | |
| # |---------------------------------------------------------------- | |
| # | PURPOSE: simulate data for poisson regression | |
| # |---------------------------------------------------------------- | |
| # this can be modified to generate data for any glm model | |
| #----------------------------------- | |
| # data simulation | |
| #---------------------------------- | |
| set.seed(123) | |
| Z <- list() # this will be our treatment assignment indicator | |
| y <- list() # this will be our outcome | |
| # create 100 treatment cases | |
| for(i in 1:500) { | |
| Z[i] = 1 | |
| y[i] = rpois(n = 1, lambda = .5) | |
| } | |
| # create 100 controls | |
| for(i in 501:1000) { | |
| Z[i] = 0 | |
| y[i] = rpois(n = 1, lambda = 1) | |
| } | |
| # unlist our data and create data frame | |
| y <- as.numeric(as.character(unlist(y))) | |
| Z <- as.numeric(as.character(unlist(Z))) | |
| df <- data.frame(y,Z) | |
| df$flag <- ifelse(y > 0,1,0) # flag postive cases | |
| #---------------------------------- | |
| # analysis | |
| #---------------------------------- | |
| # cross tabs | |
| df%>% | |
| group_by(Z)%>% | |
| summarize(Yavg = mean(y)) | |
| # control value (Z=0): 1 treatment value (Z=1): .47 | |
| # explore prob y > 0 | |
| df%>% | |
| group_by(Z)%>% | |
| summarize(Flagavg = mean(flag)) | |
| # poisson model estimate | |
| glm(y~Z, family = "poisson", data = df) # fit poisson model | |
| exp(-.755) # get IRR ~ .47 | |
| # this reflects the ratio of values for treatment and control counts we saw in our crosstabs | |
| .47/1 # .47 | |
| # logit model estimate | |
| glm(flag~Z, family = "binomial", data = df) # fit poisson model | |
| #--------------------------------------- | |
| # check our assumptions about this data | |
| #--------------------------------------- | |
| y <- seq(1,10000,by=1) # create an ID | |
| x <- rpois(n = 10000, lambda = 2) # random poisson with average count = 2 | |
| df <- data.frame(y,x) # combine into df | |
| df$flag <- ifelse(df$x > 0,1,0) # flag postive cases | |
| summary(df$flag) # p(x>0) ~ .866 | |
| summary(df$x) # mean(count) ~ 2 | |
| var(df$x) # var(x) ~ mean(x) does not exhibit overdispersion | |
| hist(df$x) # histogram |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment