ajdamico · February 22, 2016 20:34
diff --git a/La_rs_bughunt.R b/La_rs_bughunt.R
 # # # # # # # # # # # # # # # # #
 # # set the working directory # #
 # # # # # # # # # # # # # # # # #

 # setwd( "C:/My Directory/SWMAP/" )


 # # # # # # # # # # # # # # # #
 # # example survey data set # #
 # # # # # # # # # # # # # # # #

 # american community survey


 # # # # # # # # # # # # # # # # # # # # #
 # # different from other maps because # #
 # # # # # # # # # # # # # # # # # # # # #

 # displays a non-ordinal categorical variable
 # crosses the international date line


 # # # # # # # # # # # # # # # # # #
 # # smallest level of geography # #
 # # # # # # # # # # # # # # # # # #

 # state, public use microdata areas


 # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # asdfree.com blog post for this survey microdata # #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # #

 # http://www.asdfree.com/search/label/american%20community%20survey%20%28acs%29


 # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # r code repository for setup and analysis examples # #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

 # https://github.com/ajdamico/asdfree/tree/master/American%20Community%20Survey


 # # # # # # # # # # # # #
 # # value of interest # #
 # # # # # # # # # # # # #

 # disproportionate shares of veterans of foreign wars (categorical)


 # # # # # # #
 # # flaws # #
 # # # # # # #

 # map presents shares that are *disproportionately* higher than the statewide average.
 # in absolute numbers, gulf war veterans outnumber other veteran categories in four of the five pumas.


 # # # # # # # # # # # # # # # # # # # # #
 # # step 1: load the survey microdata # #

 # remove the # in order to run this install.packages line only once
 # install.packages( c( "MonetDB.R" , "MonetDBLite" ) , repos=c("http://dev.monetdb.org/Assets/R/", "http://cran.rstudio.com/"))
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 library(downloader)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # download the 2013 american community survey microdata onto the local disk
 # path.to.7z <- "7za"							# # only macintosh and *nix users need this line
 single.year.datasets.to.download <- 2013
 three.year.datasets.to.download <- NULL
 five.year.datasets.to.download <- NULL
 source_url( "https://raw.githubusercontent.com/ajdamico/asdfree/master/American%20Community%20Survey/download%20all%20microdata.R" , prompt = FALSE , echo = TRUE )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # end of step 1 # #
 # # # # # # # # # # #

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # step 2: conduct your analysis of interest at the smallest geography allowed # #

 library(survey)
 library(MonetDB.R)
 library(MonetDBLite)
 library(scales)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # connect to the database
 dbfolder <- paste0( getwd() , "/MonetDB" )

 db <- dbConnect( MonetDBLite() , dbfolder )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # # # run your analysis commands # # # #

 # subset the design to only alaska before actually constructing the design
 acs.alaska <- dbGetQuery( db , 'select * from acs2013_1yr_m where st = 2' )
 # note: this is not allowed for taylor-series linearized designs
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # disconnect from the current monet database
 dbDisconnect( db )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # construct a svrepdesign object
 alaska.design <-
 	svrepdesign(
 		weight = ~pwgtp ,
 		repweights = 'pwgtp[1-9]' ,
 		scale = 4 / 80 ,
 		rscales = rep( 1 , 80 ) ,
 		mse = TRUE ,
 		data = acs.alaska
 	)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # create a denominator variable indicating any period of service
 alaska.design <- update( alaska.design , vet = as.numeric( vps > 0 ) )

 # create a categorical variable indicating era of service
 alaska.design <- 
 	update( 
 		alaska.design , 
 		gulf = as.numeric( vps %in% 1:5 ) ,
 		vietnam = as.numeric( vps %in% 6:8 ) ,
 		other = as.numeric( vps %in% 9:15 )
 	)
 	

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # statewide era of service shares
 sw <- svyratio( ~ gulf + vietnam + other , ~ vet , alaska.design , na.rm = TRUE )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # puma-specific era of service shares
 ps <- svyby( ~ gulf + vietnam + other , denominator = ~ vet , by = ~ puma , alaska.design , svyratio , na.rm = TRUE )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # find the disproportionate shares
 ds <- ps[ , 2:4 ] - matrix( coef( sw ) , 5 , 3 , byrow = T )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # so look at this table.
 ds
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # pumas 101 and 300 have veterans that disproportionately served during the gulf wars (up to the present)

 # pumas 102 and 200 have veterans that disproportionately served during the vietnam war

 # puma 400 has veterans that disproportionately served during another era

 # hold on to these disproportionate shares and the standard errors of the original ratios.
 alaska.pumas <- cbind( ps[ 1 ] , ds , ps[ , 5:7 ] )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # note that the standard error of the ratio statistic
 # and the standard error of the ratio of
 # the difference between statewide and puma-level statistics
 # are probably not the same.  well, i'm sure of it.
 # if you're an academic statistician, you might be mad at me
 # for making this half-assed calculation right here.

 # github makes it easy to patch and edit and update other people's code.

 # go for it  ;)

 # remove those slashvets from the column names
 names( alaska.pumas ) <- gsub( "/vet" , "" , names( alaska.pumas ) )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # these are the small area statistics to be mapped
 print( alaska.pumas )
 # the standard errors are a measure of precision,
 # their inverse will serve as the mapping weights
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # make this object easier to type
 sas <- alaska.pumas
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # end of step 2 # #
 # # # # # # # # # # #


 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # step 3: download and import necessary geographic crosswalks # #

 library(downloader)
 library(maptools)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # load the download_cached and related functions
 # to prevent re-downloading of files once they've been downloaded.
 source_url(
 	"https://raw.github.com/ajdamico/asdfree/master/Download%20Cache/download%20cache.R" ,
 	prompt = FALSE ,
 	echo = FALSE
 )

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # create a temporary file containing the census bureau's
 # 2010 census tract to 2010 puma crosswalk
 # then download the file.
 ctpxw.tf <- tempfile()
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 download_cached(
 	"http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt" ,
 	ctpxw.tf ,
 	mode = 'wb'
 )
 # note: to re-download a file from scratch, add the parameter usecache = FALSE
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # import this csv file into an R data.frame object
 ctpxw <- read.csv( ctpxw.tf )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # match the column names of sf1 and of the `sas` output
 names( ctpxw ) <- c( 'state' , 'county' , 'tract' , 'puma' )

 # immediately limit this to alaskan census tracts
 ak.ctpxw <- subset( ctpxw , state == 2 )

 # clear up RAM
 rm( ctpxw ) ; gc()

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # create a temporary file containing the census bureau's
 # 2010 census summary file #1 for alaska
 # then download the file.
 sf1ak.tf <- tempfile()

 download_cached( 
 	"ftp://ftp2.census.gov/census_2010/04-Summary_File_1/Alaska/ak2010.sf1.zip" ,
 	sf1ak.tf ,
 	mode = 'wb'
 )
 # note: to re-download a file from scratch, add the parameter usecache = FALSE
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # unzip the summary file #1 files
 sf1ak.uz <- unzip( sf1ak.tf , exdir = tempdir() )

 # file layout from http://www.census.gov/prod/cen2010/doc/sf1.pdf#page=18
 sf1ak <- read.fwf( sf1ak.uz[ grep( "akgeo2010" , sf1ak.uz ) ] , c( -8 , 3 , -16 , 2 , 3 , -22 , 6 , 1 , 4 , -253 , 9 , -9 , 11 , 12 ) )

 # add columns names matching the census bureau, so it's easy to read
 names( sf1ak ) <- c( "sumlev" , "state" , "county" , "tract" , "blkgrp" , "block" , "pop100" , "intptlat" , "intptlon" )

 # summary level 101 has census tracts and census blocks
 sf1ak.101 <- subset( sf1ak , sumlev == "101" )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # merge these files together
 sf1ak.101 <- merge( sf1ak.101 , ak.ctpxw )
 # the number of records and population sums serve
 # as a check to confirm that this merge worked

 # one record per census block in alaska.  see?  same number.
 nrow( sf1ak.101 )
 # https://www.census.gov/geo/maps-data/data/tallies/census_block_tally.html

 # and guess what?  the total alaska population matches as well.
 sum( sf1ak.101$pop100 )
 # http://quickfacts.census.gov/qfd/states/02000.html

 # clear up RAM
 rm( sf1ak ) ; gc()
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))


 # so now we have a data.frame object with
 # one record per census block,
 # and also with the geography (puma)
 # that matches the american community survey
 head( sf1ak.101 )

 # and guess what?
 # we've now got the census 2010 weighted populations (field pop100)
 # and also each census block's centroid latitude & longitude (fields intptlat + intptlon)

 # # end of step 3 # #
 # # # # # # # # # # #


 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # step 4: merge the results of your survey analysis with the small-area geography # #

 # confirm that we've created all possible geographies correctly.

 # the number of records in our small area statistics..
 sas.row <- nrow( sas )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # ..should equal the number of unique-match-merged records..
 mrow <- nrow( merge( unique( sf1ak.101[ "puma" ] ) , sas ) )

 # ..and it does/they do.
 stopifnot( sas.row == mrow )

 # now the census block-level alaska census data *could* merge if you wanted it to.

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # but you don't.  yet.


 # the standard error (the `se.` fields) are measures of precision.
 print( sas )
 # the smaller the standard error, the more confident you should be
 # that the estimate at a particular geography is correct.
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))


 # so invert them.  you heard me.  invert them.
 sas$invse.gulf <- 1 / sas$se.gulf
 sas$invse.vietnam <- 1 / sas$se.vietnam
 sas$invse.other <- 1 / sas$se.other
 # a smaller standard error indicates more precision.

 # for our purposes, precision can be considered weight! #

 # now we've got the weight that we should give each of our estimates #

 # distribute that weight across all census blocks #


 # aggregate the 2010 census block populations to the geographies that you have.
 popsum <- aggregate( sf1ak.101$pop100 , by = ( sf1ak.101[ "puma" ] ) , sum )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # make the column name meaningful
 names( popsum )[ names( popsum ) == 'x' ] <- 'popsum'

 # merge the popsum onto the sasfile
 sas <- merge( sas , popsum )

 # now.  merge
 	# the disproportionate veteran era in each puma (the variable of interest)
 	# the inverted standard errors (the total weight of the broad geography)
 	# the population sum (the total population of all census blocks that are part of that geography)

 x <- merge( sf1ak.101 , sas )

 # confirm no record loss
 stopifnot( nrow( x ) == nrow( sf1ak.101 ) )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))


 # (this is the fun part)
 # calculate the weight at each census block
 x$weight.gulf <- x$invse.gulf * ( x$pop100 / x$popsum )
 x$weight.vietnam <- x$invse.vietnam * ( x$pop100 / x$popsum )
 x$weight.other <- x$invse.other * ( x$pop100 / x$popsum )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # note that weight of all census blocks put together
 # sums to the `invse` on the original analysis file
 stopifnot( sum( x$weight.gulf ) == sum( sas$invse.gulf ) )
 stopifnot( sum( x$weight.vietnam ) == sum( sas$invse.vietnam ) )
 stopifnot( sum( x$weight.other ) == sum( sas$invse.other ) )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # remove records with zero population across all three measures
 x <- subset( x , weight.gulf > 0 | weight.vietnam > 0 | weight.other > 0 )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # scale all weights so that they average to one
 x$weight.gulf <- x$weight.gulf / mean( x$weight.gulf )
 x$weight.vietnam <- x$weight.vietnam / mean( x$weight.viet )
 x$weight.other <- x$weight.other / mean( x$weight.other )


 # you're done preparing your data.
 # keep only the columns you need.
 x <- x[ , c( 'gulf', 'vietnam' , 'other' , 'weight.gulf' , 'weight.vietnam' , 'weight.other' , 'intptlat' , 'intptlon' ) ]

 # pop quiz: which states are the furthest north, east, south, west?
 # if you guessed alaska, maine, hawaii, alaska, you are wrong!
 # the answer is alaska, alaska, hawaii, alaska.

 # a few of the aleutians cross the international date line.

 # do you want to keep the edges of the aleutian islands in your map?
 # of course you do!  here's an ultra-simple recode to keep them gridded together.
 x <- transform( x , intptlon = ifelse( intptlon > 0 , intptlon - 360 , intptlon ) )

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # end of step 4 # #
 # # # # # # # # # # #


 # # # # # # # # # # # #
 # # step 5: outline # #

 library(maptools)
 library(raster)
 library(rgeos)
 library(rgdal)
 library(ggplot2)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # make a character vector containing the shapefiles to download
 shftd <- 
 	c( 
 		# download the clipped alaska public use microdata area map, described
 		# https://www.census.gov/geo/maps-data/maps/2010puma/st02_ak.html
 		'http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_02_puma10_500k.zip' ,

 		# download the clipped nationwide state outlines
 		'http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_state_500k.zip' ,

 		# download the roads in alaska
 		'http://www2.census.gov/geo/tiger/TIGER2013/PRISECROADS/tl_2013_02_prisecroads.zip'
 	)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # initiate a function to download and import all census bureau shapefiles
 daiacbsf <-
 	function( fn , myproj = "+init=epsg:2163" ){
 		tf <- tempfile()

 		# # note: to re-download a file from scratch, add the parameter usecache = FALSE # #
 		download_cached( fn , tf , mode = 'wb' )

 		# unzip the downloaded file to a temporary directory
 		shp.uz <- unzip( tf , exdir = tempdir() )

 		# figure out which filename ends with "shp"
 		sfname <- grep( 'shp$' , shp.uz , value = TRUE )
 		
 		# read in the shapefile, using the correct layer
 		sf <- readOGR( sfname  , layer = gsub( "\\.shp" , "" , basename( sfname ) ) )

 		# project this shapefile immediately
 		# this projection (and a few others) keeps
 		# the aleutian islands that cross the
 		# international date line easy to work with.
 		spTransform( sf , CRS( myproj ) )
 	}
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # run all downloads at once, store the result in a list.
 asf <- sapply( shftd , daiacbsf )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # pull out the clipped state borders of alaska only
 alaska.borders <- subset( asf[[2]] , STATEFP == '02' )

 # plot as-is.  see how the aleutians screw up the map?
 plot( alaska.borders )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # add puma boundaries
 plot( asf[[1]] , add = TRUE )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # refresh the map with state borders only
 plot( alaska.borders )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # add roads
 plot( asf[[3]] , add = TRUE , col = 'red' )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # draw a rectangle 15% bigger than the original state
 ak.shp.blank <- as( 1.3 * extent( alaska.borders ) , "SpatialPolygons" )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # calculate the difference between the rectangle and the actual shape
 ak.shp.diff <- gDifference( ak.shp.blank , alaska.borders )
 # this will be used to cover up points outside of alaska's state borders
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # this box will later blank out the surrounding area
 plot( ak.shp.diff )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # end of step 5 # #
 # # # # # # # # # # #


 # # # # # # # # # # # # # # # # # #
 # # step 6: tie knots and krige # #
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 library(sqldf)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # warning warning # # # # warning warning # # 
 # alaska has a vast geography and highly skewed population centers
 # kriging functions might not converge.  that's why there are other options ;)
 # # warning warning # # # # warning warning # # 


 # how many knots should you make? #

 # knots are the computationally-intensive part of this process,
 # choose as many as your computer and your patience can handle.

 # you should aim for between 100 - 999 knots,
 # but numbers closer to 1,000 will overload smaller computers

 # you could let the `fields` package attempt to guess knots for you,
 # xknots <- cover.design( cbind( x$intptlon , x$intptlat ) , 100 )$design
 # but with census microdata, you've already got easy access to a relevant geographic grouping

 # the sqldf() function doesn't like `.` in data.frame object names
 sf1s <- sf1ak.101
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # exactamundo same transform operation as you saw previously on `x`
 sf1s <- transform( sf1s , intptlon = ifelse( intptlon > 0 , intptlon - 360 , intptlon ) )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # within each county x census tract
 # calculate the population-weighted mean of the coordinates
 ct.knots <- 
 	sqldf( 
 		"select 
 			county , tract ,
 			sum( pop100 ) as pop100 , 
 			sum( pop100 * intptlon ) / sum( pop100 ) as intptlon ,
 			sum( pop100 * intptlat ) / sum( pop100 ) as intptlat
 		from sf1s
 		group by
 			county , tract"
 	)
 # note: this screws up coordinates that cross the international date line
 # or the equator.  in the united states, only alaska's aleutian islands do this
 # and we're mapping alaska, aren't we?  good thing we fixed it, huh?

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # interpolation option one #
 library(fields)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 krig.fit.gulf <-
 	Krig(
 		cbind( x$intptlon , x$intptlat ) ,
 		Y = x$gulf ,
 		weights = x$weight.gulf ,
 		knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
 		# if you prefer to use cover.design, all you'd need is this knots= line instead:
 		# knots = xknots
 	)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 krig.fit.vietnam <-
 	Krig(
 		cbind( x$intptlon , x$intptlat ) ,
 		x$vietnam ,
 		weights = x$weight.vietnam ,
 		knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
 		# if you prefer to use cover.design, all you'd need is this knots= line instead:
 		# knots = xknots
 	)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 krig.fit.other <-
 	Krig(
 		cbind( x$intptlon , x$intptlat ) ,
 		x$other ,
 		weights = x$weight.other ,
 		knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
 		# if you prefer to use cover.design, all you'd need is this knots= line instead:
 		# knots = xknots
 	)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))


 # that is: what is the (weighted) relationship between
 # your variable of interest (veteran service eras) and
 # the x/y points on a grid?
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # check this out!
 surface( krig.fit.gulf )
 surface( krig.fit.vietnam )
 surface( krig.fit.other )
 # you're almost there!
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))


 # interpolation option two #
 library(mgcv)

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 gam.gulf <- 
 	gam( 
 		gulf ~ s( intptlon , intptlat ) , 
 		weights = weight.gulf , 
 		data = x
 	)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 gam.vietnam <- 
 	gam( 
 		vietnam ~ s( intptlon , intptlat ) , 
 		weights = weight.vietnam , 
 		data = x
 	)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 gam.other <- 
 	gam( 
 		other ~ s( intptlon , intptlat ) , 
 		weights = weight.other , 
 		data = x
 	)
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # end of step 6 # #
 # # # # # # # # # # #


 # # # # # # # # # # # # # # # # # # # #
 # # step 7: make a grid and predict # #

 # use as fine of a grid as your computer can handle
 grid.length <- 750
 # # note: smaller grids will render faster
 # # (so they're better if you're just playing around)
 # # but larger grids will prevent your final plot from
 # # being too pixelated, even when zooming in

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 x.range <- c( min( x$intptlon ) , max( x$intptlon ) )
 y.range <- c( min( x$intptlat ) , max( x$intptlat ) )

 # add five percent on each side
 x.diff <- abs( x.range[ 2 ] - x.range[ 1 ] ) * 0.2
 y.diff <- abs( y.range[ 2 ] - y.range[ 1 ] ) * 0.2

 x.range[ 1 ] <- x.range[ 1 ] - x.diff
 x.range[ 2 ] <- x.range[ 2 ] + x.diff
 y.range[ 1 ] <- y.range[ 1 ] - y.diff
 y.range[ 2 ] <- y.range[ 2 ] + y.diff

 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 grd <- krig.grd <- gam.grd <- 
 	expand.grid(
 		intptlon = seq( x.range[ 1 ] , x.range[ 2 ] , length = grid.length ) , 
 		intptlat = seq( y.range[ 1 ] , y.range[ 2 ] , length = grid.length )
 	)

 	xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # along your rectangular grid,
 # what are the predicted values of
 # each veteran era category
 krig.grd$gulf <- predict( krig.fit.gulf , krig.grd[ , 1:2 ] )

 krig.grd$vietnam <- predict( krig.fit.vietnam , krig.grd[ , 1:2 ] )

 krig.grd$other <- predict( krig.fit.other , krig.grd[ , 1:2 ] )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 gam.grd$gulf <- predict( gam.gulf , gam.grd[ , 1:2 ] )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 gam.grd$vietnam <- predict( gam.vietnam , gam.grd[ , 1:2 ] )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 gam.grd$other <- predict( gam.other , gam.grd[ , 1:2 ] )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))



 # remember that these values have been re-scaled
 # as how disproportionate they are from the state-wide averages.
 # therefore, negative values are possible.
 sapply( krig.grd , summary )
 sapply( gam.grd , summary )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # what we're really hoping for is that
 # the overall mean averages out to zero
 sum( sapply( gam.grd , summary )[ 4 , 3:5 ] )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # in general, these predictions at each point should approximately sum to zero
 summary( rowSums( krig.grd[ , 3:5 ] ) )
 summary( rowSums( gam.grd[ , 3:5 ] ) )
 xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
 .Internal(La_rs(xyz,FALSE))

 # # end of step 7 # #
 # # # # # # # # # # #


 # # # # # # # # # # # # # # # # # # # # # #
 # # step 8: limit information and color # #


 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # # warning # # # warning # # # # # # warning # # # # # # warning # # # # # # warning # #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # if your data is are not binomial, then by mapping with a single image, you lose clarity #
 # if you have three levels of information and you generate two maps, you can get an idea  #
 # about the entire distribution of the variable.  if you attempt encoding three levels or #
 # more into a single map, you will explode. just kidding rofl lmao but you will lose info #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # # warning # # # warning # # # # # # warning # # # # # # warning # # # # # # warning # #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #


 library(scales)

 # from among the three categories, find the maximum disproportionate share
 krig.grd$svccat <- c( 'gulf' , 'vietnam' , 'other' )[ apply( krig.grd[ , 3:5 ] , 1 , which.max ) ]

 # save only that max
 krig.grd$statistic <- apply( krig.grd[ , 3:5 ] , 1 , max )

 # it's important to note that i've thrown out a lot of information here
 krig.grd <- krig.grd[ , c( 'intptlon' , 'intptlat' , 'statistic' , 'svccat' ) ]

 # do any points not make sense?
 summary( krig.grd$statistic )

 # yup, the minimum is below zero.
 krig.grd$statistic <- pmax( 0 , krig.grd$statistic )


 # from among the three categories, find the maximum disproportionate share
 gam.grd$svccat <- c( 'gulf' , 'vietnam' , 'other' )[ apply( gam.grd[ , 3:5 ] , 1 , which.max ) ]

 # save only that max
 gam.grd$statistic <- apply( gam.grd[ , 3:5 ] , 1 , max )

 # it's important to note that i've thrown out a lot of information here
 gam.grd <- gam.grd[ , c( 'intptlon' , 'intptlat' , 'statistic' , 'svccat' ) ]

 # again, do any points not make sense?
 summary( gam.grd$statistic )

 # another point below zero.
 gam.grd$statistic <- pmax( 0 , gam.grd$statistic )

 # our complex sample survey-computed statistics rely on categories,
 # but the final map only shows the _highest_ disproportionate item
 # from each puma.  for example,

 # puma 300 is slightly disproportionately more gulf veterans
 # and it's also near evenly-split between being
 # slightly disproportionately less vietnam vets and
 # slightly disproportionately less other era vets

 # puma 101 is disproportionately more gulf vets too,
 # but it has very heavily disproportionately fewer vietnam vets
 # and has close to state-average veterans from other eras.
 sas

 # only the "disproportionately more" share variable gets retained
 # in these predictions.  all other information gets thrown away.
 # this is the nature of mapping categorical variables

 # if you are intent on showing a multi-color gradient with all information,
 # you can use the rgb() function, but fair warning:
 # the values mush together quickly and your map will probably look like ass.
 # i tried building color gradients to map multi-dimensional categorical values
 # like the multi-category values in
 ps
 # but on the color gradient, the red/green/blue values on the palette tend to mush together.
 # for example, on this plot right here..
 plot( 1:5 , rep( 1 , 5 ) , cex = 3 , pch = 16 , col = mapply( rgb , ps[ , 2 ] , ps[ , 3 ] , ps[ , 4 ] ) )
 # red is gulf veterans, green is vietnam veterans, blue is other veterans.
 # the colors end up just looking drab.

 # even when re-scaled..
 rsps <- apply( ps[ , 2:4 ] , 2 , rescale )
 # ..the points with high relative rates in two categories (because they're the lowest in the third)
 # have a lot of mixture (puma 300) and are therefore indecipherable.  what is the color brown here?
 plot( 1:5 , rep( 1 , 5 ) , cex = 3 , pch = 16 , col = mapply( rgb , rsps[ , 1 ] , rsps[ , 2 ] , rsps[ , 3 ] ) )
 # high vietnam era and also high gulf era service.
 text( 1:5 , rep( 1.2 , 5 ) , ps[ , 1 ] )
 # multi-dimensional categorical variable coloring is a nightmare.

 # you have to simplify it.
 # simplifying it means throwing out information.

 # now where were we?


 library(RColorBrewer)

 # draw three gradients
 tg <-
 	lapply( 
 		brewer.pal( 3 , 'Set1' ) , 
 		function( z ) colorRampPalette( c( 'white' , z ) )( 101 )
 	)

 # check out each of these three colors, mapped from opaque to intense.
 plot( rep( 0:100 , 3 ) , rep( c( -1 , 0 , 1 ) , each = 101 ) , col = unlist( tg ) , pch = 16 , cex = 3 )

 # draw an alternate three gradients
 # that start at ~20% ( that is: 25 / 125 )
 # and also use a different palette from colorbrewer2.org
 tag <-
 	lapply( 
 		brewer.pal( 3 , 'Dark2' ) , 
 		function( z ) colorRampPalette( c( 'white' , z ) )( 125 )[ 25:125 ]
 	)

 # check out each of these three colors, mapped from opaque to intense.
 plot( rep( 0:100 , 3 ) , rep( c( -1 , 0 , 1 ) , each = 101 ) , col = unlist( tag ) , pch = 16 , cex = 3 )


 # # rescale both of the interpolated grids
 krig.grd$statistic <- krig.grd$statistic * ( 1 / max( krig.grd$statistic ) )
 gam.grd$statistic <- gam.grd$statistic * ( 1 / max( gam.grd$statistic ) )
 # note that the re-scaling gets done across all categories,
 # and not individually within each category.

 # add the hex color identifier
 krig.grd$color.value <- 
 		ifelse( krig.grd$svccat == 'gulf' , tg[[1]][ round( krig.grd$statistic * 100 ) ] ,
 		ifelse( krig.grd$svccat == 'vietnam' , tg[[2]][ round( krig.grd$statistic * 100) ] ,
 		ifelse( krig.grd$svccat == 'other' , tg[[3]][ round( krig.grd$statistic * 100 ) ] , 
 			NA ) ) )

 # awwwwwwww yeah, something's happening now.
 plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$color.value , pch = 16 , cex = 3 )

 # add the alternate hex color identifier
 krig.grd$alt.color <- 
 		ifelse( krig.grd$svccat == 'gulf' , tag[[1]][ round( krig.grd$statistic * 100 ) ] ,
 		ifelse( krig.grd$svccat == 'vietnam' , tag[[2]][ round( krig.grd$statistic * 100) ] ,
 		ifelse( krig.grd$svccat == 'other' , tag[[3]][ round( krig.grd$statistic * 100 ) ] , 
 			NA ) ) )

 # that looks a bit better to me
 plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$alt.color , pch = 16 , cex = 3 )


 # lower-bound the alternate color to remove the white lines
 krig.grd$bound.color <- 
 		ifelse( krig.grd$svccat == 'gulf' , tag[[1]][ pmax( 5 , round( krig.grd$statistic * 100 ) ) ] ,
 		ifelse( krig.grd$svccat == 'vietnam' , tag[[2]][ pmax( 5 , round( krig.grd$statistic * 100) ) ] ,
 		ifelse( krig.grd$svccat == 'other' , tag[[3]][ pmax( 5 , round( krig.grd$statistic * 100 ) ) ] , 
 			NA ) ) )

 # that's smoothing by hand for you.
 plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$bound.color , pch = 16 , cex = 3 )


 # put that color band on the `gam.grd` data.frame as well
 gam.grd$bound.color <- 
 		ifelse( gam.grd$svccat == 'gulf' , tag[[1]][ pmax( 5 , round( gam.grd$statistic * 100 ) ) ] ,
 		ifelse( gam.grd$svccat == 'vietnam' , tag[[2]][ pmax( 5 , round( gam.grd$statistic * 100) ) ] ,
 		ifelse( gam.grd$svccat == 'other' , tag[[3]][ pmax( 5 , round( gam.grd$statistic * 100 ) ) ] , 
 			NA ) ) )


 # # end of step 8 # #
 # # # # # # # # # # #


 # # # # # # # # # # # # # # # # # # # # #
 # # step 9: ggplot and choose options # #

 library(ggplot2)
 library(mapproj)
 library(scales)


 # initiate the krige-based plot
 krig.grd$color.column <- as.factor( krig.grd$bound.color )

 krg.plot <- 
 	ggplot( data = krig.grd , aes( x = intptlon , y = intptlat ) ) +
 	geom_point( shape = 15 , colour = krig.grd$color.column ) +
 	scale_fill_manual( values = unique( krig.grd$bound.color ) )


 # initiate the gam-based plot
 gam.grd$color.column <- as.factor( gam.grd$bound.color )

 gam.plot <- 
 	ggplot( data = gam.grd , aes( x = intptlon , y = intptlat ) ) +
 	geom_point( shape = 15 , colour = gam.grd$color.column ) +
 	scale_fill_manual( values = unique( gam.grd$bound.color ) )

 # view both grids!
 krg.plot
 gam.plot


 # initiate the entire plot
 the.plot <-

 	# choose only one of the two interpolation grids
 	krg.plot +
 	# gam.plot +
 	
 	# blank out the legend and axis labels
 	theme(
 		legend.position = "none" ,
 		axis.title.x = element_blank() ,
 		axis.title.y = element_blank()		
 	) + 
 	
 	xlab( "" ) + ylab( "" ) +

 	# force the x and y axis limits at the shape of the city and don't do anything special for off-map values
 	scale_x_continuous( limits = c( -191 , -127 ) , breaks = NULL , oob = squish ) +
 	# since we're going to add lots of surrounding-area detail!
    scale_y_continuous( limits = c( 50 , 73 ) , breaks = NULL , oob = squish ) +

 	theme(
 		panel.grid.major = element_blank(),
 		panel.grid.minor = element_blank(),
 		panel.background = element_blank(),
 		panel.border = element_blank(),
 		axis.ticks = element_blank()
 	)

 # print the plot to the screen
 the.plot
 # this is the bottom layer.


 # initiate an aleutian islands-focused wrap-around function
 s360 <- function( z ){ z[ z$long > 0 , 'long' ] <- z[ z$long > 0 , 'long' ] - 360 ; z }


 # # alaskan state borders # #

 # convert the alaskan borders to longlat,
 # prepare for ggplot2 with `fortify`
 # wrap edge points around
 ab <- s360( fortify( spTransform( alaska.borders , CRS( "+proj=longlat" ) ) ) )

 # store this information in a layer
 state.border.layer <- geom_path( data = ab , aes( x = long , y = lat , group = group ) , colour = 'darkgrey' )

 # plot the result
 the.plot + state.border.layer


 # # alaskan main roads # #

 # convert the alaskan borders to longlat,
 # prepare for ggplot2 with `fortify`
 # wrap edge points around
 akr <- s360( fortify( spTransform( asf[[3]] , CRS( "+proj=longlat" ) ) ) )

 # store this information in a layer
 state.roads.layer <- geom_path( data = akr , aes( x = long , y = lat , group=group ) , colour = 'darkred' )

 # plot the result
 the.plot + state.border.layer + state.roads.layer

 # # end of step 9 # #
 # # # # # # # # # # #


 # # # # # # # # # # # # # # # # # # # # #
 # # step 10: project, blank, and save # #

 library(ggplot2)
 library(scales)
 library(raster)
 library(plyr)
 library(rgeos)


 # exclude outer alaska if you hate the wilderness or something
 the.plot + state.border.layer + coord_cartesian( xlim = c( -155 , max( x$intptlon ) ) , ylim = c( min( x$intptlat ) , 70 ) )

 # distort the map with simple latitude/longitude scaling
 the.plot + state.border.layer + coord_fixed( 2.5 )

 # this looks crappy, who knows what it is
 the.plot + state.border.layer + coord_equal()

 # check out a bunch of other options #
 the.plot + state.border.layer + coord_map( project = "cylequalarea" , mean( x$intptlat ) )

 # here's the one that makes the most sense for alaska
 the.plot + state.border.layer + coord_map( project = "conic" , mean( x$intptlat ) , orientation = c( 90 , 0 , -141 ) )

 # see ?mapproject and the ?coord_* functions for a zillion alternatives

 # store this projection, but not the state border
 the.plot <- the.plot + coord_map( project = "conic" , mean( x$intptlat ) , orientation = c( 90 , 0 , -141 ) )
 # into `the.plot`


 # force the difference shapefile's projection
 proj4string( ak.shp.diff ) <- "+init=epsg:2163"

 # initiate the outside blanking layer
 outside <- s360( fortify( spTransform( ak.shp.diff , CRS( "+proj=longlat" ) ) ) )

 # fix islands piecing together
 outside2 <- ddply( outside , .( piece ) , function( x ) rbind( x , outside[ 1 , ] ) )

 # convert this fortified object to a ggplot layer
 outside.layer <- geom_polygon( data = outside2 , aes( x = long , y = lat , group = id ) , fill = 'white' )

 # plot this -- the layer doesn't work, does it?
 the.plot + outside.layer

 # five points need to change so we have a real bounding box.
 subset( outside , lat < 45 | lat > 75 | long < -190 | long > -125 )

 # move all of them counter-clockwise by hand
 outside[ outside$order %in% c( 1 , 5 ) , 'long' ] <- -116.6568
 # outside[ outside$order %in% c( 1 , 5 ) , 'lat' ] <- 20

 # outside[ outside$order %in% 4 , 'long' ] <- -220
 outside[ outside$order %in% 4 , 'lat' ] <- 37.56767

 outside[ outside$order %in% 3 , 'long' ] <- -195.4295
 # outside[ outside$order %in% 3 , 'lat' ] <- 100

 # outside[ outside$order %in% 2 , 'long' ] <- -100
 outside[ outside$order %in% 2 , 'lat' ] <- 79.36447


 # fix islands piecing together
 outside2 <- ddply( outside , .( piece ) , function( x ) rbind( x , outside[ 1 , ] ) )

 # convert this fortified object to a ggplot layer
 outside.layer <- geom_polygon( data = outside2 , aes( x = long , y = lat , group = id ) , fill = 'white' )

 # plot this. 
 the.plot + outside.layer
 # that's not so bad, i guess.

 # i don't care for the state border layer,
 # but if you want the state border layer,
 # use this save line:
 final.plot <- the.plot + outside.layer + state.border.layer
 # otherwise use this save line:
 # final.plot <- the.plot + outside.layer
 # you can airbrush the outside blue border
 # in microsoft paint or something
 # if you want, right? like a boss.


 # save the file to your current working directory
 ggsave( 
 	"2013 alaskan veteran service eras.png" ,
 	plot = final.plot ,
 	scale = 3
 )
 # happy?

 # # end of step ten # #
 # # # # # # # # # # # #