Skip to content

Instantly share code, notes, and snippets.

@jduckles
Created March 24, 2010 21:56
Show Gist options
  • Save jduckles/342876 to your computer and use it in GitHub Desktop.
Save jduckles/342876 to your computer and use it in GitHub Desktop.
#!/bin/bash
## Jonah Duckles - [email protected]
## BASH & R Code to download census participation rates and make a map of census progress.
## Can be run daily to see how the census is progressing
## Requires library(maptools) library(RColorBrewer) and library(classInt) to be installed in R
## Requires gdal/ogr (www.gdal.org) and spatialite (http://www.gaia-gis.it/spatialite/) tools in your path
##
## Example output: http://jduck.net/files/participation2010.png
#### BEGIN DATA PREP ####
# Get cenusus participation rate data -N for timestamp checking
wget -N "http://2010.census.gov/2010census/take10map/downloads/participationrates2010.txt"
# Apply a header to our new output file of just counties, the census dataset has tracts, states counties etc.
echo "fips|county|desc|y2000|y2010" > counties.txt
# Extract only the counties and mangle it into csv-ish format
grep "||County ||" participationrates2010.txt | sort -n | sed 's/\|\| /\|/g;s/ \|\|/\|/g' >> counties.txt
# Get shapefile of counties
wget -nc "http://dds.cr.usgs.gov/pub/data/nationalatlas/countyp020.tar.gz"
if [ -f "countyp020.shp" ]; then
echo "We already have counties"
else
# unarchive counties from national atlas
tar -xzvf countyp020.tar.gz
fi
# Use spatialite to join counties.txt with countyp020.shp (on fips code)
# and drop new shp with census data attached, then reproject using OGR
# This removes the crazy R gymnastics with merge() etc.
spatialite <<EOF
.mode csv
.separator |
select InitSpatialMetaData();
INSERT INTO spatial_ref_sys (srid, auth_name, auth_srid, ref_sys_name, proj4text) VALUES (4269, 'epsg', 4269, 'NAD83', '+proj=longlat +ellps=GRS80 +datum=NAD83 +no_defs');
create table census_status (fips character(5), county varchar(50), desc varchar(20), y2000 numeric(5,2), y2010 numeric(5,2));
.import counties.txt census_status
CREATE virtual table virtual_counties using VirtualShape(countyp020,UTF-8,4269);
-- Join shp and tabular data from census.gov, removing great lakes polygons (cofips = "000")
create table export as select a.Geometry, a.*, b.* from virtual_counties as a left join (select fips, county, desc, y2000, y2010 from census_status) as b using(fips) where substr(a.FIPS,3,3) != "000";
.dumpshp export Geometry export UTF-8 POLYGON
EOF
# Pass through OGR to reproject to a more pleasing and non-geographic projection.
aeana="+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs" # Albers conic projection for north america
ogr2ogr -overwrite -s_srs EPSG:4269 -t_srs "${aeana}" -f "ESRI Shapefile" export_counties.shp export.shp
# Cleanup
rm export.*
#### END DATA PREP ####
#### BEGIN R CODE ####
# Quote "RCODE" in here document to prevent $ being expanded as shell variables.
R --slave << "RCODE"
# Make sure these are installed in R before you run
library(maptools)
library(RColorBrewer)
library(classInt)
c <- readShapePoly('export_counties.shp')
nclr <- 9
colpal <- brewer.pal(nclr,"Blues")
class <- classIntervals(c$y2010, nclr, style="quantile")
pcol <- findColours(class,colpal)
pdf('participation2010.pdf', width=17, height=11)
# Plot the map with pcols. xlim, ylim zoom us in to conus, AK, HI and PR are in the dataset as well.
plot(c, col=pcol, border="gray",xlim=c(-2652453,2784842),ylim=c(-1862042,1549076))
legend("topleft", legend=names(attr(pcol, "table")), fill=attr(pcol, "palette"), cex = 0.75, bty = "n")
dev.off()
RCODE
#### END R CODE ####
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment