ajdamico · November 10, 2010 20:52
diff --git a/concentration of health expenditures.R b/concentration of health expenditures.R
 #warning: excludes the institutionalized population!

 #install survey and foreign packages
 #-- only run these lines of code the first time

 #install.packages("foreign")
 #install.packages("survey")

 #designate each of the MEPS consolidated files - 
 consolidated_files <- c( 12 , 20 , 28 , 38 , 50 , 60 , 70 , 79 , 89 , 97 , 105 , 113 )
 #-- and their respective years
 meps_years <- c( "96" , "97" , "98" , "99" , "00" , "01" , "02" , "03" , "04" , "05" , "06" , "07" )

 ####################################################
 #choose your year of MEPS data to use!
 #for 1996, choose m <- 1. for 2007, choose m <- 12.
 m <- 12
 ####################################################

 #create the plot
 plot(c(0,100),c(0,1),axes=FALSE,col="white",xlab="Percent of Population",ylab="Percent of Spending")

 #draw the axes and the title
 axis(1,seq(0,100,by=10),lty=1,tck=0,cex.axis=.7,labels=seq(0,100,by=10))
 axis(2,at=c(0,.25,.5,.75,1),ylim=c(0,1),labels=c("0%","25%","50%","75%","100%"),cex.axis=.7)
 title("Concentration of Health Care Spending in the U.S. Population",cex.main=1,line=3)


 #load survey and foreign packages
 library(foreign)
 library(survey)


 #assign the http:// location to download the consolidated file from
 meps_zipped_file <- 
 	paste("http://www.meps.ahrq.gov/mepsweb/data_files/pufs/h",consolidated_files[m],"ssp.zip",sep="")

 #assign the weight, psu, and strata, depending on the year
 weight <- ifelse( meps_years[m] %in% c("96","97","98") , paste("WTDPER",meps_years[m],sep="") , paste("PERWT",meps_years[m],"F",sep="") )
 psu <- ifelse( m <= 6 , paste("VARPSU",meps_years[m],sep="") , "VARPSU" )
 strata <- ifelse( m <= 6 , paste("VARSTR",meps_years[m],sep="") , "VARSTR" )

 #assign the total expenditure variable
 totexp <- paste("TOTEXP",meps_years[m],sep="")

 #create a temporary file to save the consolidated file to
 meps_temp_file <- tempfile()

 #download that consolidated file, save to that temporary file
 download.file( meps_zipped_file , meps_temp_file ) 

 #unzip and read the consolidated file into R
 meps_unzipped_file <- read.xport( unzip( meps_temp_file ) )

 #immediately create a survey object, using the complex survey sampling variables assigned above
 meps_analysis_file <- svydesign(id = ~ get(psu) ,
 strata = ~ get(strata) , weights = ~ get(weight) ,
 data = meps_unzipped_file , nest = TRUE)

 #determine the cutpoints for each of the quantiles, from 0 to 100
 qtiles <- as.vector(svyquantile( ~get(totexp) , meps_analysis_file , seq(.01 , 1 , by=.01) ) )

 #return to the unzipped file and assign each individual record to the correct quantile
 meps_unzipped_file$QT <- 0

 	for (i in 1:100){
 	meps_unzipped_file <- transform( 
 					meps_unzipped_file , QT = ifelse(
 						eval(parse(text=totexp)) > qtiles[i] , i , QT ) )
 	}

 #re-create the survey object, this time with the quantiles (QT) variable in place
 meps_analysis_file <- svydesign(id = ~ get(psu) ,
 strata = ~ get(strata) , weights = ~ get(weight) ,
 data = meps_unzipped_file , nest = TRUE)
 	
 #calculate the total amount spent on healthcare by non-institutionalized civilian americans in the current year
 full_year_spending <- svytotal(~get(totexp) , meps_analysis_file )

 #calculate the total amount spent on healthcare by each respective quantile
 qtile_totals <- svyby(~get(totexp) , ~QT , meps_analysis_file , svytotal)

 #divide each respective quantile total by the full year spending total to determine each quantile's share
 qtile_shares <- qtile_totals[,2] / full_year_spending

 #tack on zeroes at the beginning, since the first ten to fifteen quantiles are all zeroes
 num_zeroes <- 100 - length(unique(meps_unzipped_file$QT))
 qtile_shares <- c( rep( 0 , num_zeroes) , qtile_shares) 

 #add each quantile share to the subsequent quantile share, in order to cumulatively grow to 100% of national health spending
 qtile_sums<-qtile_shares
 	for (i in 2:length(qtile_sums)){
 	qtile_sums[i] <- qtile_sums[i] + qtile_sums[i-1]
 	}



 #label certain points
 for (n in c( 50 , 80 , 85 , 90, 95 , 99 ) ) {
 text(n , qtile_sums[n] , paste(round(qtile_sums[n]*100,1),"%",sep="") , pos=3)
 }

 #draw the actual concentration curve
 lines(1:99 , qtile_sums[1:99])

 #write out the interpretation of the midpoint
 text(0,.95,paste("Interpretations:\nThe lowest half are responsible for ",
 					round(qtile_sums[50]*100,1),"% of all health care spending.\n",
 					"The highest 1% are responsible for 100% - ",
 					round(qtile_sums[99]*100,1),"% = ",round(100-qtile_sums[99]*100,1),
 					"% of all health care spending.",sep=""),pos=4,cex=.75)
	#warning: excludes the institutionalized population!

	#install survey and foreign packages
	#-- only run these lines of code the first time

	#install.packages("foreign")
	#install.packages("survey")

	#designate each of the MEPS consolidated files -
	consolidated_files <- c( 12 , 20 , 28 , 38 , 50 , 60 , 70 , 79 , 89 , 97 , 105 , 113 )
	#-- and their respective years
	meps_years <- c( "96" , "97" , "98" , "99" , "00" , "01" , "02" , "03" , "04" , "05" , "06" , "07" )

	####################################################
	#choose your year of MEPS data to use!
	#for 1996, choose m <- 1. for 2007, choose m <- 12.
	m <- 12
	####################################################

	#create the plot
	plot(c(0,100),c(0,1),axes=FALSE,col="white",xlab="Percent of Population",ylab="Percent of Spending")

	#draw the axes and the title
	axis(1,seq(0,100,by=10),lty=1,tck=0,cex.axis=.7,labels=seq(0,100,by=10))
	axis(2,at=c(0,.25,.5,.75,1),ylim=c(0,1),labels=c("0%","25%","50%","75%","100%"),cex.axis=.7)
	title("Concentration of Health Care Spending in the U.S. Population",cex.main=1,line=3)


	#load survey and foreign packages
	library(foreign)
	library(survey)


	#assign the http:// location to download the consolidated file from
	meps_zipped_file <-
	paste("http://www.meps.ahrq.gov/mepsweb/data_files/pufs/h",consolidated_files[m],"ssp.zip",sep="")

	#assign the weight, psu, and strata, depending on the year
	weight <- ifelse( meps_years[m] %in% c("96","97","98") , paste("WTDPER",meps_years[m],sep="") , paste("PERWT",meps_years[m],"F",sep="") )
	psu <- ifelse( m <= 6 , paste("VARPSU",meps_years[m],sep="") , "VARPSU" )
	strata <- ifelse( m <= 6 , paste("VARSTR",meps_years[m],sep="") , "VARSTR" )

	#assign the total expenditure variable
	totexp <- paste("TOTEXP",meps_years[m],sep="")

	#create a temporary file to save the consolidated file to
	meps_temp_file <- tempfile()

	#download that consolidated file, save to that temporary file
	download.file( meps_zipped_file , meps_temp_file )

	#unzip and read the consolidated file into R
	meps_unzipped_file <- read.xport( unzip( meps_temp_file ) )

	#immediately create a survey object, using the complex survey sampling variables assigned above
	meps_analysis_file <- svydesign(id = ~ get(psu) ,
	strata = ~ get(strata) , weights = ~ get(weight) ,
	data = meps_unzipped_file , nest = TRUE)

	#determine the cutpoints for each of the quantiles, from 0 to 100
	qtiles <- as.vector(svyquantile( ~get(totexp) , meps_analysis_file , seq(.01 , 1 , by=.01) ) )

	#return to the unzipped file and assign each individual record to the correct quantile
	meps_unzipped_file$QT <- 0

	for (i in 1:100){
	meps_unzipped_file <- transform(
	meps_unzipped_file , QT = ifelse(
	eval(parse(text=totexp)) > qtiles[i] , i , QT ) )
	}

	#re-create the survey object, this time with the quantiles (QT) variable in place
	meps_analysis_file <- svydesign(id = ~ get(psu) ,
	strata = ~ get(strata) , weights = ~ get(weight) ,
	data = meps_unzipped_file , nest = TRUE)

	#calculate the total amount spent on healthcare by non-institutionalized civilian americans in the current year
	full_year_spending <- svytotal(~get(totexp) , meps_analysis_file )

	#calculate the total amount spent on healthcare by each respective quantile
	qtile_totals <- svyby(~get(totexp) , ~QT , meps_analysis_file , svytotal)

	#divide each respective quantile total by the full year spending total to determine each quantile's share
	qtile_shares <- qtile_totals[,2] / full_year_spending

	#tack on zeroes at the beginning, since the first ten to fifteen quantiles are all zeroes
	num_zeroes <- 100 - length(unique(meps_unzipped_file$QT))
	qtile_shares <- c( rep( 0 , num_zeroes) , qtile_shares)

	#add each quantile share to the subsequent quantile share, in order to cumulatively grow to 100% of national health spending
	qtile_sums<-qtile_shares
	for (i in 2:length(qtile_sums)){
	qtile_sums[i] <- qtile_sums[i] + qtile_sums[i-1]
	}



	#label certain points
	for (n in c( 50 , 80 , 85 , 90, 95 , 99 ) ) {
	text(n , qtile_sums[n] , paste(round(qtile_sums[n]*100,1),"%",sep="") , pos=3)
	}

	#draw the actual concentration curve
	lines(1:99 , qtile_sums[1:99])

	#write out the interpretation of the midpoint
	text(0,.95,paste("Interpretations:\nThe lowest half are responsible for ",
	round(qtile_sums[50]*100,1),"% of all health care spending.\n",
	"The highest 1% are responsible for 100% - ",
	round(qtile_sums[99]100,1),"% = ",round(100-qtile_sums[99]100,1),
	"% of all health care spending.",sep=""),pos=4,cex=.75)