Created
March 3, 2021 23:41
-
-
Save BioSciEconomist/02fdf7edb4420d8fc4e5fac81d3c5733 to your computer and use it in GitHub Desktop.
Simulate claims cost data for treatment and control experiment scenario
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: simulate claims cost data.R | |
# | DATE: 3/2/21 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: simualte example treatment and control claims cost data | |
# *---------------------------------------------------------------- | |
# https://data.library.virginia.edu/assessing-type-s-and-type-m-errors/ | |
library(MASS) | |
library(dplyr) | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
# | |
# treatent group | |
# | |
mu <- 4000 # mean | |
sd <- 12000 # standard deviation | |
cv <- sd/mu # CV | |
kappa <- 1/(cv^2) # shape parameter | |
theta <- (sd*sd)/mu # this is theta or scale parameter | |
# simulate costs based on parameter assumptions above | |
set.seed(1234567) | |
cost <- rgamma(100000, shape = kappa, scale = theta) | |
# check descriptive results | |
summary(cost) | |
sd(cost) | |
hist(cost, breaks = 50) | |
# simulate ID and treatment indicator | |
ID <- seq(1,100000) | |
D <- rep(1,100000) | |
trt <- data.frame(ID,cost,D) # combine metrics | |
# | |
# control group | |
# | |
mu <- 6000 # mean | |
sd <- 18000 # standard deviation | |
cv <- sd/mu # CV | |
kappa <- 1/(cv^2) # shape parameter | |
theta <- (sd*sd)/mu # this is theta or scale parameter | |
# simulate costs based on parameter assumptions above | |
set.seed(1234567) | |
cost <- rgamma(100000, shape = kappa, scale = theta) | |
# check descriptive results | |
summary(cost) | |
sd(cost) | |
hist(cost, breaks = 50) | |
# simulate ID and treatment indicator | |
ID <- seq(100001,200000) | |
D <- rep(0,100000) | |
ctrl <- data.frame(ID,cost,D) # combine metrics | |
# | |
# create population data frame | |
# | |
df <- rbind(trt,ctrl) | |
# check descriptives | |
summary(df) | |
df%>% | |
group_by(D) %>% | |
summarize(y = mean(cost), | |
n = n()) | |
hist(df$cost, breaks = 50) | |
summary(lm(cost ~ D, data = df)) # regression on full model | |
summary(glm(cost ~ D, data = df, family = "Gamma"(link = 'log'),epsilon = 1e-8, maxit = 500, trace = FALSE)) # gamma reg |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment