Created
November 10, 2010 20:52
-
-
Save ajdamico/671495 to your computer and use it in GitHub Desktop.
draw a curve showing the concentration of all healthcare expenditures in the united states
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#warning: excludes the institutionalized population! | |
#install survey and foreign packages | |
#-- only run these lines of code the first time | |
#install.packages("foreign") | |
#install.packages("survey") | |
#designate each of the MEPS consolidated files - | |
consolidated_files <- c( 12 , 20 , 28 , 38 , 50 , 60 , 70 , 79 , 89 , 97 , 105 , 113 ) | |
#-- and their respective years | |
meps_years <- c( "96" , "97" , "98" , "99" , "00" , "01" , "02" , "03" , "04" , "05" , "06" , "07" ) | |
#################################################### | |
#choose your year of MEPS data to use! | |
#for 1996, choose m <- 1. for 2007, choose m <- 12. | |
m <- 12 | |
#################################################### | |
#create the plot | |
plot(c(0,100),c(0,1),axes=FALSE,col="white",xlab="Percent of Population",ylab="Percent of Spending") | |
#draw the axes and the title | |
axis(1,seq(0,100,by=10),lty=1,tck=0,cex.axis=.7,labels=seq(0,100,by=10)) | |
axis(2,at=c(0,.25,.5,.75,1),ylim=c(0,1),labels=c("0%","25%","50%","75%","100%"),cex.axis=.7) | |
title("Concentration of Health Care Spending in the U.S. Population",cex.main=1,line=3) | |
#load survey and foreign packages | |
library(foreign) | |
library(survey) | |
#assign the http:// location to download the consolidated file from | |
meps_zipped_file <- | |
paste("http://www.meps.ahrq.gov/mepsweb/data_files/pufs/h",consolidated_files[m],"ssp.zip",sep="") | |
#assign the weight, psu, and strata, depending on the year | |
weight <- ifelse( meps_years[m] %in% c("96","97","98") , paste("WTDPER",meps_years[m],sep="") , paste("PERWT",meps_years[m],"F",sep="") ) | |
psu <- ifelse( m <= 6 , paste("VARPSU",meps_years[m],sep="") , "VARPSU" ) | |
strata <- ifelse( m <= 6 , paste("VARSTR",meps_years[m],sep="") , "VARSTR" ) | |
#assign the total expenditure variable | |
totexp <- paste("TOTEXP",meps_years[m],sep="") | |
#create a temporary file to save the consolidated file to | |
meps_temp_file <- tempfile() | |
#download that consolidated file, save to that temporary file | |
download.file( meps_zipped_file , meps_temp_file ) | |
#unzip and read the consolidated file into R | |
meps_unzipped_file <- read.xport( unzip( meps_temp_file ) ) | |
#immediately create a survey object, using the complex survey sampling variables assigned above | |
meps_analysis_file <- svydesign(id = ~ get(psu) , | |
strata = ~ get(strata) , weights = ~ get(weight) , | |
data = meps_unzipped_file , nest = TRUE) | |
#determine the cutpoints for each of the quantiles, from 0 to 100 | |
qtiles <- as.vector(svyquantile( ~get(totexp) , meps_analysis_file , seq(.01 , 1 , by=.01) ) ) | |
#return to the unzipped file and assign each individual record to the correct quantile | |
meps_unzipped_file$QT <- 0 | |
for (i in 1:100){ | |
meps_unzipped_file <- transform( | |
meps_unzipped_file , QT = ifelse( | |
eval(parse(text=totexp)) > qtiles[i] , i , QT ) ) | |
} | |
#re-create the survey object, this time with the quantiles (QT) variable in place | |
meps_analysis_file <- svydesign(id = ~ get(psu) , | |
strata = ~ get(strata) , weights = ~ get(weight) , | |
data = meps_unzipped_file , nest = TRUE) | |
#calculate the total amount spent on healthcare by non-institutionalized civilian americans in the current year | |
full_year_spending <- svytotal(~get(totexp) , meps_analysis_file ) | |
#calculate the total amount spent on healthcare by each respective quantile | |
qtile_totals <- svyby(~get(totexp) , ~QT , meps_analysis_file , svytotal) | |
#divide each respective quantile total by the full year spending total to determine each quantile's share | |
qtile_shares <- qtile_totals[,2] / full_year_spending | |
#tack on zeroes at the beginning, since the first ten to fifteen quantiles are all zeroes | |
num_zeroes <- 100 - length(unique(meps_unzipped_file$QT)) | |
qtile_shares <- c( rep( 0 , num_zeroes) , qtile_shares) | |
#add each quantile share to the subsequent quantile share, in order to cumulatively grow to 100% of national health spending | |
qtile_sums<-qtile_shares | |
for (i in 2:length(qtile_sums)){ | |
qtile_sums[i] <- qtile_sums[i] + qtile_sums[i-1] | |
} | |
#label certain points | |
for (n in c( 50 , 80 , 85 , 90, 95 , 99 ) ) { | |
text(n , qtile_sums[n] , paste(round(qtile_sums[n]*100,1),"%",sep="") , pos=3) | |
} | |
#draw the actual concentration curve | |
lines(1:99 , qtile_sums[1:99]) | |
#write out the interpretation of the midpoint | |
text(0,.95,paste("Interpretations:\nThe lowest half are responsible for ", | |
round(qtile_sums[50]*100,1),"% of all health care spending.\n", | |
"The highest 1% are responsible for 100% - ", | |
round(qtile_sums[99]*100,1),"% = ",round(100-qtile_sums[99]*100,1), | |
"% of all health care spending.",sep=""),pos=4,cex=.75) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment